[lld] dc8bee9 - [lld-macho] Check address ranges when applying relocations
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 12 14:26:41 PST 2021
Author: Jez Ng
Date: 2021-03-12T17:26:27-05:00
New Revision: dc8bee92658e4ac2314aa2f59123078d49463219
URL: https://github.com/llvm/llvm-project/commit/dc8bee92658e4ac2314aa2f59123078d49463219
DIFF: https://github.com/llvm/llvm-project/commit/dc8bee92658e4ac2314aa2f59123078d49463219.diff
LOG: [lld-macho] Check address ranges when applying relocations
This diff required fixing `getEmbeddedAddend` to apply sign
extension to 32-bit values. We were previously passing around wrong
64-bit addend values that became "right" after being truncated back to
32-bit.
I've also made `getEmbeddedAddend` return a signed int, which is similar
to what LLD-ELF does for its `getImplicitAddend`.
`reportRangeError`, `checkUInt`, and `checkInt` are counterparts of similar
functions in LLD-ELF.
Reviewed By: #lld-macho, thakis
Differential Revision: https://reviews.llvm.org/D98387
Added:
lld/test/MachO/invalid/range-check.s
Modified:
lld/MachO/Arch/ARM64.cpp
lld/MachO/Arch/X86_64.cpp
lld/MachO/InputFiles.cpp
lld/MachO/Relocations.cpp
lld/MachO/Relocations.h
lld/MachO/Target.h
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 0d1e2f447adc..14fcee290d12 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -28,8 +28,8 @@ namespace {
struct ARM64 : TargetInfo {
ARM64();
- uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
- const relocation_info) const override;
+ int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
+ const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t pc) const override;
@@ -77,8 +77,8 @@ const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
return relocAttrsArray[type];
}
-uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
- const relocation_info rel) const {
+int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
+ const relocation_info rel) const {
if (rel.r_type != ARM64_RELOC_UNSIGNED &&
rel.r_type != ARM64_RELOC_SUBTRACTOR) {
// All other reloc types should use the ADDEND relocation to store their
@@ -91,7 +91,7 @@ uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
const uint8_t *loc = buf + sec.offset + rel.r_address;
switch (rel.r_length) {
case 2:
- return read32le(loc);
+ return static_cast<int32_t>(read32le(loc));
case 3:
return read64le(loc);
default:
@@ -108,18 +108,30 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
// | | imm26 |
// +-----------+---------------------------------------------------+
-inline uint64_t encodeBranch26(uint64_t base, uint64_t va) {
+inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
+ checkInt(r, va, 28);
// Since branch destinations are 4-byte aligned, the 2 least-
// significant bits are 0. They are right shifted off the end.
return (base | bitField(va, 2, 26, 0));
}
+inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
+ checkInt(d, va, 28);
+ return (base | bitField(va, 2, 26, 0));
+}
+
// 30 29 23 5
// +-+---+---------+-------------------------------------+---------+
// | |ilo| | immhi | |
// +-+---+---------+-------------------------------------+---------+
-inline uint64_t encodePage21(uint64_t base, uint64_t va) {
+inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
+ checkInt(r, va, 35);
+ return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+}
+
+inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
+ checkInt(d, va, 35);
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
}
@@ -158,21 +170,25 @@ void ARM64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
value += r.addend;
switch (r.type) {
case ARM64_RELOC_BRANCH26:
- value = encodeBranch26(base, value - pc);
+ value = encodeBranch26(r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
+ if (r.length == 2)
+ checkInt(r, value, 32);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
+ checkInt(r, value, 32);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
- case ARM64_RELOC_TLVP_LOAD_PAGE21:
+ case ARM64_RELOC_TLVP_LOAD_PAGE21: {
assert(r.pcrel);
- value = encodePage21(base, pageBits(value) - pageBits(pc));
+ value = encodePage21(r, base, pageBits(value) - pageBits(pc));
break;
+ }
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
@@ -206,7 +222,8 @@ void ARM64::writeStub(uint8_t *buf8, const macho::Symbol &sym) const {
uint64_t pcPageBits =
pageBits(in.stubs->addr + sym.stubsIndex * sizeof(stubCode));
uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * WordSize;
- buf32[0] = encodePage21(stubCode[0], pageBits(lazyPointerVA) - pcPageBits);
+ buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
+ pageBits(lazyPointerVA) - pcPageBits);
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
buf32[2] = stubCode[2];
}
@@ -226,14 +243,15 @@ void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
return pageBits(in.stubHelper->addr + i * sizeof(uint32_t));
};
uint64_t loaderVA = in.imageLoaderCache->getVA();
- buf32[0] =
- encodePage21(stubHelperHeaderCode[0], pageBits(loaderVA) - pcPageBits(0));
+ SymbolDiagnostic d = {nullptr, "stub header helper"};
+ buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
+ pageBits(loaderVA) - pcPageBits(0));
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * WordSize;
- buf32[3] =
- encodePage21(stubHelperHeaderCode[3], pageBits(binderVA) - pcPageBits(3));
+ buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
+ pageBits(binderVA) - pcPageBits(3));
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
buf32[5] = stubHelperHeaderCode[5];
}
@@ -250,8 +268,8 @@ void ARM64::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym,
auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
uint64_t stubHelperHeaderVA = in.stubHelper->addr;
buf32[0] = stubHelperEntryCode[0];
- buf32[1] =
- encodeBranch26(stubHelperEntryCode[1], stubHelperHeaderVA - pcVA(1));
+ buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
+ stubHelperHeaderVA - pcVA(1));
buf32[2] = sym.lazyBindOffset;
}
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 7a7ad0a7f0fc..d4bb841a571f 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -25,8 +25,8 @@ namespace {
struct X86_64 : TargetInfo {
X86_64();
- uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
- const relocation_info) const override;
+ int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
+ const relocation_info) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
uint64_t relocVA) const override;
@@ -77,14 +77,14 @@ static int pcrelOffset(uint8_t type) {
}
}
-uint64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
- relocation_info rel) const {
+int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
+ relocation_info rel) const {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
const uint8_t *loc = buf + sec.offset + rel.r_address;
switch (rel.r_length) {
case 2:
- return read32le(loc) + pcrelOffset(rel.r_type);
+ return static_cast<int32_t>(read32le(loc)) + pcrelOffset(rel.r_type);
case 3:
return read64le(loc) + pcrelOffset(rel.r_type);
default:
@@ -102,6 +102,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
switch (r.length) {
case 2:
+ if (r.type == X86_64_RELOC_UNSIGNED)
+ checkUInt(r, value, 32);
+ else
+ checkInt(r, value, 32);
write32le(loc, value);
break;
case 3:
@@ -121,9 +125,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
// bufAddr: The virtual address corresponding to buf[0].
// bufOff: The offset within buf of the next instruction.
// destAddr: The destination address that the current instruction references.
-static void writeRipRelative(uint8_t *buf, uint64_t bufAddr, uint64_t bufOff,
- uint64_t destAddr) {
+static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
+ uint64_t bufOff, uint64_t destAddr) {
uint64_t rip = bufAddr + bufOff;
+ checkInt(d, destAddr - rip, 32);
// For the instructions we care about, the RIP-relative address is always
// stored in the last 4 bytes of the instruction.
write32le(buf + bufOff - 4, destAddr - rip);
@@ -136,7 +141,7 @@ static constexpr uint8_t stub[] = {
void X86_64::writeStub(uint8_t *buf, const macho::Symbol &sym) const {
memcpy(buf, stub, 2); // just copy the two nonzero bytes
uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
- writeRipRelative(buf, stubAddr, sizeof(stub),
+ writeRipRelative({&sym, "stub"}, buf, stubAddr, sizeof(stub),
in.lazyPointers->addr + sym.stubsIndex * WordSize);
}
@@ -149,8 +154,10 @@ static constexpr uint8_t stubHelperHeader[] = {
void X86_64::writeStubHelperHeader(uint8_t *buf) const {
memcpy(buf, stubHelperHeader, sizeof(stubHelperHeader));
- writeRipRelative(buf, in.stubHelper->addr, 7, in.imageLoaderCache->getVA());
- writeRipRelative(buf, in.stubHelper->addr, 0xf,
+ SymbolDiagnostic d = {nullptr, "stub helper header"};
+ writeRipRelative(d, buf, in.stubHelper->addr, 7,
+ in.imageLoaderCache->getVA());
+ writeRipRelative(d, buf, in.stubHelper->addr, 0xf,
in.got->addr +
in.stubHelper->stubBinder->gotIndex * WordSize);
}
@@ -164,8 +171,8 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
uint64_t entryAddr) const {
memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry));
write32le(buf + 1, sym.lazyBindOffset);
- writeRipRelative(buf, entryAddr, sizeof(stubHelperEntry),
- in.stubHelper->addr);
+ writeRipRelative({&sym, "stub helper"}, buf, entryAddr,
+ sizeof(stubHelperEntry), in.stubHelper->addr);
}
void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 32ef5b46730a..4ef56eaccde4 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -264,7 +264,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
// and insert them. Storing addends in the instruction stream is
// possible, but inconvenient and more costly at link time.
- uint64_t pairedAddend = 0;
+ int64_t pairedAddend = 0;
relocation_info relInfo = relInfos[i];
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@@ -276,9 +276,9 @@ void ObjFile::parseRelocations(const section_64 &sec,
if (relInfo.r_address & R_SCATTERED)
fatal("TODO: Scattered relocations not supported");
- uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
+ int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
assert(!(embeddedAddend && pairedAddend));
- uint64_t totalAddend = pairedAddend + embeddedAddend;
+ int64_t totalAddend = pairedAddend + embeddedAddend;
Reloc r;
r.type = relInfo.r_type;
r.pcrel = relInfo.r_pcrel;
diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index a60dbb21ff9e..e3b4cea5123b 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -39,4 +39,24 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
return valid;
}
+void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
+ int64_t min, uint64_t max) {
+ std::string hint;
+ if (auto *sym = r.referent.dyn_cast<Symbol *>())
+ hint = "; references " + toString(*sym);
+ // TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
+ error("relocation " + target->getRelocAttrs(r.type).name +
+ " is out of range: " + v + " is not in [" + Twine(min) + ", " +
+ Twine(max) + "]" + hint);
+}
+
+void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
+ int64_t min, uint64_t max) {
+ std::string hint;
+ if (d.symbol)
+ hint = "; references " + toString(*d.symbol);
+ error(d.reason + " is out of range: " + v + " is not in [" + Twine(min) +
+ ", " + Twine(max) + "]" + hint);
+}
+
const RelocAttrs macho::invalidRelocAttrs{"INVALID", RelocAttrBits::_0};
diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 21a27aba14ac..f717c462d722 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -59,13 +59,41 @@ struct Reloc {
uint32_t offset = 0;
// Adding this offset to the address of the referent symbol or subsection
// gives the destination that this relocation refers to.
- uint64_t addend = 0;
+ int64_t addend = 0;
llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
};
bool validateSymbolRelocation(const Symbol *, const InputSection *,
const Reloc &);
+/*
+ * v: The value the relocation is attempting to encode
+ * bits: The number of bits actually available to encode this relocation
+ */
+void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
+ int64_t min, uint64_t max);
+
+struct SymbolDiagnostic {
+ const Symbol *symbol;
+ llvm::StringRef reason;
+};
+
+void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
+ int64_t min, uint64_t max);
+
+template <typename Diagnostic>
+inline void checkInt(Diagnostic d, int64_t v, int bits) {
+ if (v != llvm::SignExtend64(v, bits))
+ reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
+ llvm::maxIntN(bits));
+}
+
+template <typename Diagnostic>
+inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
+ if ((v >> bits) != 0)
+ reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
+}
+
extern const RelocAttrs invalidRelocAttrs;
} // namespace macho
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 8d98a73a6cca..52a2c5b44376 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -39,7 +39,7 @@ class TargetInfo {
virtual ~TargetInfo() = default;
// Validate the relocation structure and get its addend.
- virtual uint64_t
+ virtual int64_t
getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
const llvm::MachO::relocation_info) const = 0;
virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
diff --git a/lld/test/MachO/invalid/range-check.s b/lld/test/MachO/invalid/range-check.s
new file mode 100644
index 000000000000..22778f427846
--- /dev/null
+++ b/lld/test/MachO/invalid/range-check.s
@@ -0,0 +1,32 @@
+# REQUIRES: x86
+
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
+# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
+# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s
+
+# CHECK: error: relocation UNSIGNED is out of range: 8589942792 is not in [0, 4294967295]; references _foo
+# CHECK: error: relocation GOT_LOAD is out of range: 4294974033 is not in [-2147483648, 2147483647]; references _foo
+# CHECK: error: stub is out of range: 4294974006 is not in [-2147483648, 2147483647]; references _bar
+# CHECK: error: stub helper header is out of range: 4294974005 is not in [-2147483648, 2147483647]
+# CHECK: error: stub helper header is out of range: 4294969893 is not in [-2147483648, 2147483647]
+
+#--- bar.s
+.globl _bar
+_bar:
+
+#--- test.s
+.globl _main, _foo
+
+_main:
+ movq _foo at GOTPCREL(%rip), %rax
+ callq _bar
+ ret
+
+.int _foo
+.zerofill __TEXT,bss,_zero,0xffffffff
+
+.data
+_foo:
+ .space 0
More information about the llvm-commits
mailing list