[lld] a552fb2 - [lld-macho] Have relocation address included in range-check error message
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 28 18:57:06 PST 2022
Author: Jez Ng
Date: 2022-02-28T21:56:38-05:00
New Revision: a552fb2a86dba5e33bd94fbcddfd5597eb49e619
URL: https://github.com/llvm/llvm-project/commit/a552fb2a86dba5e33bd94fbcddfd5597eb49e619
DIFF: https://github.com/llvm/llvm-project/commit/a552fb2a86dba5e33bd94fbcddfd5597eb49e619.diff
LOG: [lld-macho] Have relocation address included in range-check error message
This makes it easier to debug those errors. See e.g. https://github.com/llvm/llvm-project/issues/52767#issuecomment-1028713943
We take the approach of 'reverse-engineering' the InputSection from the
output buffer offset. This provides for a cleaner Target API, and is
similar to LLD-ELF's implementation of getErrorPlace().
Reviewed By: #lld-macho, Roger
Differential Revision: https://reviews.llvm.org/D118903
Added:
Modified:
lld/MachO/Arch/ARM64Common.cpp
lld/MachO/Arch/ARM64Common.h
lld/MachO/Arch/X86_64.cpp
lld/MachO/Relocations.cpp
lld/MachO/Relocations.h
lld/MachO/SyntheticSections.h
lld/MachO/Writer.cpp
lld/test/MachO/invalid/range-check.s
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64Common.cpp b/lld/MachO/Arch/ARM64Common.cpp
index 67e7292fd6fdb..f55258ce8ec9f 100644
--- a/lld/MachO/Arch/ARM64Common.cpp
+++ b/lld/MachO/Arch/ARM64Common.cpp
@@ -38,56 +38,57 @@ int64_t ARM64Common::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
}
}
+static void writeValue(uint8_t *loc, const Reloc &r, uint64_t value) {
+ switch (r.length) {
+ case 2:
+ checkInt(loc, r, value, 32);
+ write32le(loc, value);
+ break;
+ case 3:
+ write64le(loc, value);
+ break;
+ default:
+ llvm_unreachable("invalid r_length");
+ }
+}
+
// For instruction relocations (load, store, add), the base
// instruction is pre-populated in the text section. A pre-populated
// instruction has opcode & register-operand bits set, with immediate
// operands zeroed. We read it from text, OR-in the immediate
// operands, then write-back the completed instruction.
-
void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
uint64_t pc) const {
+ auto loc32 = reinterpret_cast<uint32_t *>(loc);
uint32_t base = ((r.length == 2) ? read32le(loc) : 0);
switch (r.type) {
case ARM64_RELOC_BRANCH26:
- value = encodeBranch26(r, base, value - pc);
+ encodeBranch26(loc32, r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
- if (r.length == 2)
- checkInt(r, value, 32);
+ writeValue(loc, r, value);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
- checkInt(r, value, 32);
+ writeValue(loc, r, value);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
- case ARM64_RELOC_TLVP_LOAD_PAGE21: {
+ case ARM64_RELOC_TLVP_LOAD_PAGE21:
assert(r.pcrel);
- value = encodePage21(r, base, pageBits(value) - pageBits(pc));
+ encodePage21(loc32, r, base, pageBits(value) - pageBits(pc));
break;
- }
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
assert(!r.pcrel);
- value = encodePageOff12(base, value);
+ encodePageOff12(loc32, base, value);
break;
default:
llvm_unreachable("unexpected relocation type");
}
-
- switch (r.length) {
- case 2:
- write32le(loc, value);
- break;
- case 3:
- write64le(loc, value);
- break;
- default:
- llvm_unreachable("invalid r_length");
- }
}
void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const {
diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h
index 934101caefb99..5f7b017b2ef2a 100644
--- a/lld/MachO/Arch/ARM64Common.h
+++ b/lld/MachO/Arch/ARM64Common.h
@@ -40,16 +40,18 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
// | | imm26 |
// +-----------+---------------------------------------------------+
-inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
- checkInt(r, va, 28);
+inline void encodeBranch26(uint32_t *loc, const Reloc &r, uint32_t base,
+ uint64_t va) {
+ checkInt(loc, r, va, 28);
// Since branch destinations are 4-byte aligned, the 2 least-
// significant bits are 0. They are right shifted off the end.
- return (base | bitField(va, 2, 26, 0));
+ llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
}
-inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
- checkInt(d, va, 28);
- return (base | bitField(va, 2, 26, 0));
+inline void encodeBranch26(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
+ uint64_t va) {
+ checkInt(loc, d, va, 28);
+ llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
}
// 30 29 23 5
@@ -57,14 +59,18 @@ inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
// | |ilo| | immhi | |
// +-+---+---------+-------------------------------------+---------+
-inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
- checkInt(r, va, 35);
- return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+inline void encodePage21(uint32_t *loc, const Reloc &r, uint32_t base,
+ uint64_t va) {
+ checkInt(loc, r, va, 35);
+ llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
+ bitField(va, 14, 19, 5));
}
-inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
- checkInt(d, va, 35);
- return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+inline void encodePage21(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
+ uint64_t va) {
+ checkInt(loc, d, va, 35);
+ llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
+ bitField(va, 14, 19, 5));
}
// 21 10
@@ -72,7 +78,7 @@ inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
// | | imm12 | |
// +-------------------+-----------------------+-------------------+
-inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
+inline void encodePageOff12(uint32_t *loc, uint32_t base, uint64_t va) {
int scale = 0;
if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store
scale = base >> 30;
@@ -82,7 +88,8 @@ inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
// TODO(gkm): extract embedded addend and warn if != 0
// uint64_t addend = ((base & 0x003FFC00) >> 10);
- return (base | bitField(va, scale, 12 - scale, 10));
+ llvm::support::endian::write32le(loc,
+ base | bitField(va, scale, 12 - scale, 10));
}
inline uint64_t pageBits(uint64_t address) {
@@ -99,9 +106,9 @@ inline void writeStub(uint8_t *buf8, const uint32_t stubCode[3],
pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize);
uint64_t lazyPointerVA =
in.lazyPointers->addr + sym.stubsIndex * LP::wordSize;
- buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
- pageBits(lazyPointerVA) - pcPageBits);
- buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
+ encodePage21(&buf32[0], {&sym, "stub"}, stubCode[0],
+ pageBits(lazyPointerVA) - pcPageBits);
+ encodePageOff12(&buf32[1], stubCode[1], lazyPointerVA);
buf32[2] = stubCode[2];
}
@@ -114,15 +121,15 @@ inline void writeStubHelperHeader(uint8_t *buf8,
};
uint64_t loaderVA = in.imageLoaderCache->getVA();
SymbolDiagnostic d = {nullptr, "stub header helper"};
- buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
- pageBits(loaderVA) - pcPageBits(0));
- buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
+ encodePage21(&buf32[0], d, stubHelperHeaderCode[0],
+ pageBits(loaderVA) - pcPageBits(0));
+ encodePageOff12(&buf32[1], stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize;
- buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
- pageBits(binderVA) - pcPageBits(3));
- buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
+ encodePage21(&buf32[3], d, stubHelperHeaderCode[3],
+ pageBits(binderVA) - pcPageBits(3));
+ encodePageOff12(&buf32[4], stubHelperHeaderCode[4], binderVA);
buf32[5] = stubHelperHeaderCode[5];
}
@@ -133,8 +140,8 @@ inline void writeStubHelperEntry(uint8_t *buf8,
auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
uint64_t stubHelperHeaderVA = in.stubHelper->addr;
buf32[0] = stubHelperEntryCode[0];
- buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
- stubHelperHeaderVA - pcVA(1));
+ encodeBranch26(&buf32[1], {&sym, "stub helper"}, stubHelperEntryCode[1],
+ stubHelperHeaderVA - pcVA(1));
buf32[2] = sym.lazyBindOffset;
}
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 7e2155801fc2f..68360eedf4937 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -102,9 +102,9 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
switch (r.length) {
case 2:
if (r.type == X86_64_RELOC_UNSIGNED)
- checkUInt(r, value, 32);
+ checkUInt(loc, r, value, 32);
else
- checkInt(r, value, 32);
+ checkInt(loc, r, value, 32);
write32le(loc, value);
break;
case 3:
@@ -127,7 +127,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
uint64_t bufOff, uint64_t destAddr) {
uint64_t rip = bufAddr + bufOff;
- checkInt(d, destAddr - rip, 32);
+ checkInt(buf, d, destAddr - rip, 32);
// For the instructions we care about, the RIP-relative address is always
// stored in the last 4 bytes of the instruction.
write32le(buf + bufOff - 4, destAddr - rip);
diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index a711157679c14..6f4b00c7acf91 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Relocations.h"
+#include "ConcatOutputSection.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@@ -38,19 +39,65 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
return valid;
}
-void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
- int64_t min, uint64_t max) {
+// Given an offset in the output buffer, figure out which ConcatInputSection (if
+// any) maps to it. At the same time, update the offset such that it is relative
+// to the InputSection rather than to the output buffer.
+//
+// Obtaining the InputSection allows us to have better error diagnostics.
+// However, many of our relocation-handling methods do not take the InputSection
+// as a parameter. Since we are already passing the buffer offsets to our Target
+// methods, this function allows us to emit better errors without threading an
+// additional InputSection argument through the call stack.
+//
+// This is implemented as a slow linear search through OutputSegments,
+// OutputSections, and finally the InputSections themselves. However, this
+// function should be called only on error paths, so some overhead is fine.
+static InputSection *offsetToInputSection(uint64_t *off) {
+ for (OutputSegment *seg : outputSegments) {
+ if (*off < seg->fileOff || *off >= seg->fileOff + seg->fileSize)
+ continue;
+
+ const std::vector<OutputSection *> §ions = seg->getSections();
+ size_t osecIdx = 0;
+ for (; osecIdx < sections.size(); ++osecIdx)
+ if (*off < sections[osecIdx]->fileOff)
+ break;
+ assert(osecIdx > 0);
+ // We should be only calling this function on offsets that belong to
+ // ConcatOutputSections.
+ auto *osec = cast<ConcatOutputSection>(sections[osecIdx - 1]);
+ *off -= osec->fileOff;
+
+ size_t isecIdx = 0;
+ for (; isecIdx < osec->inputs.size(); ++isecIdx) {
+ const ConcatInputSection *isec = osec->inputs[isecIdx];
+ if (*off < isec->outSecOff)
+ break;
+ }
+ assert(isecIdx > 0);
+ ConcatInputSection *isec = osec->inputs[isecIdx - 1];
+ *off -= isec->outSecOff;
+ return isec;
+ }
+ return nullptr;
+}
+
+void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v,
+ uint8_t bits, int64_t min, uint64_t max) {
std::string hint;
+ uint64_t off = reinterpret_cast<const uint8_t *>(loc) - in.bufferStart;
+ const InputSection *isec = offsetToInputSection(&off);
+ std::string locStr = isec ? isec->getLocation(off) : "(invalid location)";
if (auto *sym = r.referent.dyn_cast<Symbol *>())
hint = "; references " + toString(*sym);
- // TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
- error("relocation " + target->getRelocAttrs(r.type).name +
+ error(locStr + ": relocation " + target->getRelocAttrs(r.type).name +
" is out of range: " + v + " is not in [" + Twine(min) + ", " +
Twine(max) + "]" + hint);
}
-void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
- int64_t min, uint64_t max) {
+void macho::reportRangeError(void *loc, SymbolDiagnostic d, const Twine &v,
+ uint8_t bits, int64_t min, uint64_t max) {
+ // FIXME: should we use `loc` somehow to provide a better error message?
std::string hint;
if (d.symbol)
hint = "; references " + toString(*d.symbol);
diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 9457465ac203d..b82ca4ebebccf 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -70,28 +70,28 @@ bool validateSymbolRelocation(const Symbol *, const InputSection *,
* v: The value the relocation is attempting to encode
* bits: The number of bits actually available to encode this relocation
*/
-void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
- int64_t min, uint64_t max);
+void reportRangeError(void *loc, const Reloc &, const llvm::Twine &v,
+ uint8_t bits, int64_t min, uint64_t max);
struct SymbolDiagnostic {
const Symbol *symbol;
llvm::StringRef reason;
};
-void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
- int64_t min, uint64_t max);
+void reportRangeError(void *loc, SymbolDiagnostic, const llvm::Twine &v,
+ uint8_t bits, int64_t min, uint64_t max);
template <typename Diagnostic>
-inline void checkInt(Diagnostic d, int64_t v, int bits) {
+inline void checkInt(void *loc, Diagnostic d, int64_t v, int bits) {
if (v != llvm::SignExtend64(v, bits))
- reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
+ reportRangeError(loc, d, llvm::Twine(v), bits, llvm::minIntN(bits),
llvm::maxIntN(bits));
}
template <typename Diagnostic>
-inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
+inline void checkUInt(void *loc, Diagnostic d, uint64_t v, int bits) {
if ((v >> bits) != 0)
- reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
+ reportRangeError(loc, d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
}
inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) {
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index b871477331c8d..58faa87e3059f 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -595,6 +595,7 @@ class WordLiteralSection final : public SyntheticSection {
};
struct InStruct {
+ const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
CStringSection *cStringSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 6aaefaae8cc4a..56a26f11e63cf 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1048,10 +1048,10 @@ void Writer::openFile() {
FileOutputBuffer::F_executable);
if (!bufferOrErr)
- error("failed to open " + config->outputFile + ": " +
+ fatal("failed to open " + config->outputFile + ": " +
llvm::toString(bufferOrErr.takeError()));
- else
- buffer = std::move(*bufferOrErr);
+ buffer = std::move(*bufferOrErr);
+ in.bufferStart = buffer->getBufferStart();
}
void Writer::writeSections() {
diff --git a/lld/test/MachO/invalid/range-check.s b/lld/test/MachO/invalid/range-check.s
index c25d367dd1184..2f087e38c2662 100644
--- a/lld/test/MachO/invalid/range-check.s
+++ b/lld/test/MachO/invalid/range-check.s
@@ -6,8 +6,8 @@
# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s
-# CHECK: error: relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
-# CHECK: error: relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
+# CHECK: error: {{.*}}test.o:(symbol _main+0xd): relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
+# CHECK: error: {{.*}}test.o:(symbol _main+0x3): relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
# CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
More information about the llvm-commits
mailing list