[lld] 53c796b - [lld-macho] Properly handle & validate relocation r_length

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 18:24:19 PDT 2020


Author: Jez Ng
Date: 2020-06-14T16:35:23-07:00
New Revision: 53c796b948f04755f67e00ac45de91d45df2b656

URL: https://github.com/llvm/llvm-project/commit/53c796b948f04755f67e00ac45de91d45df2b656
DIFF: https://github.com/llvm/llvm-project/commit/53c796b948f04755f67e00ac45de91d45df2b656.diff

LOG: [lld-macho] Properly handle & validate relocation r_length

Summary:
We should be reading / writing our addends / relocated addresses based on
r_length, and not just based on the type of the relocation. But since only
some r_length values are valid for a given reloc type, I've also added some
validation.

ld64 has code to allow for r_length = 0 in X86_64_RELOC_BRANCH relocs, but I'm
not sure how to create such a relocation...

Reviewed By: smeenai

Differential Revision: https://reviews.llvm.org/D80854

Added: 
    lld/test/MachO/invalid/invalid-relocation-length.yaml
    lld/test/MachO/invalid/invalid-relocation-pcrel.yaml

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/Target.h
    lld/test/MachO/x86-64-reloc-unsigned.s

Removed: 
    lld/test/MachO/invalid/invalid-relocation.yaml


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 1b58e70114e9..2326faec96e6 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -27,7 +27,7 @@ struct X86_64 : TargetInfo {
 
   uint64_t getImplicitAddend(MemoryBufferRef, const section_64 &,
                              const relocation_info &) const override;
-  void relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const override;
+  void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override;
 
   void writeStub(uint8_t *buf, const DylibSymbol &) const override;
   void writeStubHelperHeader(uint8_t *buf) const override;
@@ -48,12 +48,35 @@ static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec,
       .str();
 }
 
+static void validateLength(MemoryBufferRef mb, const section_64 &sec,
+                           const relocation_info &rel,
+                           const std::vector<uint8_t> &validLengths) {
+  if (std::find(validLengths.begin(), validLengths.end(), rel.r_length) !=
+      validLengths.end())
+    return;
+
+  std::string msg = getErrorLocation(mb, sec, rel) + ": relocations of type " +
+                    std::to_string(rel.r_type) + " must have r_length of ";
+  bool first = true;
+  for (uint8_t length : validLengths) {
+    if (!first)
+      msg += " or ";
+    first = false;
+    msg += std::to_string(length);
+  }
+  fatal(msg);
+}
+
 uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
                                    const relocation_info &rel) const {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
   switch (rel.r_type) {
   case X86_64_RELOC_BRANCH:
+    // XXX: ld64 also supports r_length = 0 here but I'm not sure when such a
+    // relocation will actually be generated.
+    validateLength(mb, sec, rel, {2});
+    break;
   case X86_64_RELOC_SIGNED:
   case X86_64_RELOC_SIGNED_1:
   case X86_64_RELOC_SIGNED_2:
@@ -62,20 +85,35 @@ uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
     if (!rel.r_pcrel)
       fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " +
             std::to_string(rel.r_type) + " must be pcrel");
-    return read32le(loc);
+    validateLength(mb, sec, rel, {2});
+    break;
   case X86_64_RELOC_UNSIGNED:
     if (rel.r_pcrel)
       fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " +
             std::to_string(rel.r_type) + " must not be pcrel");
-    return read64le(loc);
+    validateLength(mb, sec, rel, {2, 3});
+    break;
   default:
     error("TODO: Unhandled relocation type " + std::to_string(rel.r_type));
     return 0;
   }
+
+  switch (rel.r_length) {
+  case 0:
+    return *loc;
+  case 1:
+    return read16le(loc);
+  case 2:
+    return read32le(loc);
+  case 3:
+    return read64le(loc);
+  default:
+    llvm_unreachable("invalid r_length");
+  }
 }
 
-void X86_64::relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const {
-  switch (type) {
+void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const {
+  switch (r.type) {
   case X86_64_RELOC_BRANCH:
   case X86_64_RELOC_SIGNED:
   case X86_64_RELOC_SIGNED_1:
@@ -83,16 +121,33 @@ void X86_64::relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const {
   case X86_64_RELOC_SIGNED_4:
   case X86_64_RELOC_GOT_LOAD:
     // These types are only used for pc-relative relocations, so offset by 4
-    // since the RIP has advanced by 4 at this point.
-    write32le(loc, val - 4);
+    // since the RIP has advanced by 4 at this point. This is only valid when
+    // r_length = 0, which is enforced by validateLength().
+    val -= 4;
     break;
   case X86_64_RELOC_UNSIGNED:
-    write64le(loc, val);
     break;
   default:
     llvm_unreachable(
         "getImplicitAddend should have flagged all unhandled relocation types");
   }
+
+  switch (r.length) {
+  case 0:
+    *loc = val;
+    break;
+  case 1:
+    write16le(loc, val);
+    break;
+  case 2:
+    write32le(loc, val);
+    break;
+  case 3:
+    write64le(loc, val);
+    break;
+  default:
+    llvm_unreachable("invalid r_length");
+  }
 }
 
 // The following methods emit a number of assembly sequences with RIP-relative

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 1b299e9250d9..af80d528e513 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -178,6 +178,7 @@ void InputFile::parseRelocations(const section_64 &sec,
     Reloc r;
     r.type = rel.r_type;
     r.pcrel = rel.r_pcrel;
+    r.length = rel.r_length;
     uint64_t rawAddend = target->getImplicitAddend(mb, sec, rel);
 
     if (rel.r_extern) {

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 894f5ffabaf4..3dce2aeb0139 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -46,6 +46,6 @@ void InputSection::writeTo(uint8_t *buf) {
     uint64_t val = va + r.addend;
     if (r.pcrel)
       val -= getVA() + r.offset;
-    target->relocateOne(buf + r.offset, r.type, val);
+    target->relocateOne(buf + r.offset, r, val);
   }
 }

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 5c21b83d74a5..10250200bb18 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -25,6 +25,7 @@ class Symbol;
 struct Reloc {
   uint8_t type;
   bool pcrel;
+  uint8_t length;
   // The offset from the start of the subsection that this relocation belongs
   // to.
   uint32_t offset;

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index b677878504fe..74df6a951a90 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -39,7 +39,7 @@ class TargetInfo {
   virtual uint64_t
   getImplicitAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
                     const llvm::MachO::relocation_info &) const = 0;
-  virtual void relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const = 0;
+  virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0;
 
   // Write code for lazy binding. See the comments on StubsSection for more
   // details.

diff  --git a/lld/test/MachO/invalid/invalid-relocation-length.yaml b/lld/test/MachO/invalid/invalid-relocation-length.yaml
new file mode 100644
index 000000000000..74ff1485adb9
--- /dev/null
+++ b/lld/test/MachO/invalid/invalid-relocation-length.yaml
@@ -0,0 +1,99 @@
+# REQUIRES: x86
+# RUN: yaml2obj %s -o %t.o
+# RUN: not lld -flavor darwinnew -arch x86_64 -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o
+#
+# CHECK: error: invalid relocation at offset 1 of __TEXT,__text in [[FILE]]: relocations of type 0 must have r_length of 2 or 3
+
+!mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      280
+  flags:           0x00000000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         ''
+    vmaddr:          0
+    vmsize:          9
+    fileoff:         312
+    filesize:        9
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            9
+        offset:          0x00000138
+        align:           0
+        reloff:          0x00000144
+        nreloc:          1
+        flags:           0x80000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         '000000000000000000'
+        relocations:
+          - address:         0x00000001
+            symbolnum:       1
+            pcrel:           false
+            length:          1
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           659200
+    sdk:             0
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          332
+    nsyms:           2
+    stroff:          364
+    strsize:         12
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      2
+    iundefsym:       2
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:
+  NameList:
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         1
+    - n_strx:          6
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         9
+  StringTable:
+    - ''
+    - _foo
+    - _main

diff  --git a/lld/test/MachO/invalid/invalid-relocation.yaml b/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml
similarity index 100%
rename from lld/test/MachO/invalid/invalid-relocation.yaml
rename to lld/test/MachO/invalid/invalid-relocation-pcrel.yaml

diff  --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s
index 69c8a234b4ea..52a3d536139c 100644
--- a/lld/test/MachO/x86-64-reloc-unsigned.s
+++ b/lld/test/MachO/x86-64-reloc-unsigned.s
@@ -5,7 +5,7 @@
 # CHECK: Contents of section foo:
 # CHECK:  100001000 08100000 01000000
 # CHECK: Contents of section bar:
-# CHECK:  100001008 11211111 02000000
+# CHECK:  100001008 011000f0 11211111 02000000
 
 .globl _main, _foo, _bar
 
@@ -15,7 +15,14 @@ _foo:
 
 .section __DATA,bar
 _bar:
-## The unsigned relocation should support 64-bit addends
+## We create a .int symbol reference here -- with non-zero data immediately
+## after -- to check that lld reads precisely 32 bits (and not more) of the
+## implicit addend when handling unsigned relocations of r_length = 2.
+## Note that __PAGEZERO occupies the lower 32 bits, so all symbols are above
+## that. To get a final relocated address that fits within 32 bits, we need to
+## subtract an offset here.
+.int _foo - 0x0fffffff
+## The unsigned relocation should support 64-bit addends too (r_length = 3).
 .quad _foo + 0x111111111
 
 .text


        


More information about the llvm-commits mailing list