[lld] bcaf57c - [lld-macho] Parse relocations quickly by assuming sorted order

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 4 22:14:48 PDT 2021


Author: Jez Ng
Date: 2021-07-05T01:13:44-04:00
New Revision: bcaf57cae82500f40f2348f5ee41e57b11152825

URL: https://github.com/llvm/llvm-project/commit/bcaf57cae82500f40f2348f5ee41e57b11152825
DIFF: https://github.com/llvm/llvm-project/commit/bcaf57cae82500f40f2348f5ee41e57b11152825.diff

LOG: [lld-macho] Parse relocations quickly by assuming sorted order

clang and gcc both seem to emit relocations in reverse order of
address. That means we can match relocations to their containing
subsections in `O(relocs + subsections)` rather than the `O(relocs *
log(subsections))` that our previous binary search implementation
required.

Unfortunately, `ld -r` can still emit unsorted relocations, so we have a
fallback code path for that (less common) case.

Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:

      N           Min           Max        Median           Avg        Stddev
  x  20          4.04          4.11         4.075        4.0775   0.018027756
  +  20          3.95          4.02          3.98         3.985   0.020900768
  Difference at 95.0% confidence
          -0.0925 +/- 0.0124919
          -2.26855% +/- 0.306361%
          (Student's t, pooled s = 0.0195172)

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D105410

Added: 
    lld/test/MachO/unsorted-relocations.yaml

Modified: 
    lld/MachO/ConcatOutputSection.cpp
    lld/MachO/InputFiles.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 99e4558ab82f..87a6b3f18f48 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -241,6 +241,8 @@ void ConcatOutputSection::finalize() {
     }
     // Process relocs by ascending address, i.e., ascending offset within isec
     std::vector<Reloc> &relocs = isec->relocs;
+    // FIXME: This property does not hold for object files produced by ld64's
+    // `-r` mode.
     assert(is_sorted(relocs,
                      [](Reloc &a, Reloc &b) { return a.offset > b.offset; }));
     for (Reloc &r : reverse(relocs)) {

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index fd42f57ad378..703050157328 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -366,6 +366,7 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
   ArrayRef<relocation_info> relInfos(
       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
 
+  auto subsecIt = subsecMap.rbegin();
   for (size_t i = 0; i < relInfos.size(); i++) {
     // Paired relocations serve as Mach-O's method for attaching a
     // supplemental datum to a primary relocation record. ELF does not
@@ -440,7 +441,24 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
       r.addend = referentOffset;
     }
 
-    InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
+    // Find the subsection that this relocation belongs to.
+    // Though not required by the Mach-O format, clang and gcc seem to emit
+    // relocations in order, so let's take advantage of it. However, ld64 emits
+    // unsorted relocations (in `-r` mode), so we have a fallback for that
+    // uncommon case.
+    InputSection *subsec;
+    while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset)
+      ++subsecIt;
+    if (subsecIt == subsecMap.rend() ||
+        subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
+      subsec = findContainingSubsection(subsecMap, &r.offset);
+      // Now that we know the relocs are unsorted, avoid trying the 'fast path'
+      // for the other relocations.
+      subsecIt = subsecMap.rend();
+    } else {
+      subsec = subsecIt->isec;
+      r.offset -= subsecIt->offset;
+    }
     subsec->relocs.push_back(r);
 
     if (isSubtrahend) {

diff  --git a/lld/test/MachO/unsorted-relocations.yaml b/lld/test/MachO/unsorted-relocations.yaml
new file mode 100644
index 000000000000..c33b779050a7
--- /dev/null
+++ b/lld/test/MachO/unsorted-relocations.yaml
@@ -0,0 +1,106 @@
+## This tests that we can handle relocations that are not sorted by address.
+## llvm-mc isn't able to emit such a file, hence the use of yaml2obj. ld64
+## may emit files with unsorted relocations in `-r` mode, so we need to support
+## this.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: %lld -dylib -o %t %t.o
+# RUN: llvm-objdump --macho -d %t | FileCheck %s
+
+# CHECK:      _foo:
+# CHECK-NEXT:      movq    _bar(%rip), %rax
+# CHECK-NEXT: _bar:
+# CHECK-NEXT:      movq    _baz(%rip), %rax
+# CHECK-NEXT: _baz:
+# CHECK-NEXT:      movq    _foo(%rip), %rax
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           2
+  sizeofcmds:      280
+  flags:           0x2000
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         ''
+    vmaddr:          0
+    vmsize:          21
+    fileoff:         312
+    filesize:        21
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0
+        size:            21
+        offset:          0x138
+        align:           0
+        reloff:          0x150
+        nreloc:          3
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         488B0500000000488B0500000000488B0500000000
+        relocations:
+          - address:         0x3
+            symbolnum:       1
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+          - address:         0x11
+            symbolnum:       0
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+          - address:         0xA
+            symbolnum:       2
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          360
+    nsyms:           3
+    stroff:          408
+    strsize:         16
+LinkEditData:
+  NameList:
+    - n_strx:          11
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          6
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         7
+    - n_strx:          1
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         14
+  StringTable:
+    - ''
+    - _baz
+    - _bar
+    - _foo
+...


        


More information about the llvm-commits mailing list