[lld] 573c7e6 - [lld-macho] Handle LOH_ARM64_ADRP_LDR linker optimization hints

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 1 00:48:17 PDT 2022


Author: Daniel Bertalan
Date: 2022-07-01T09:44:24+02:00
New Revision: 573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4

URL: https://github.com/llvm/llvm-project/commit/573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4
DIFF: https://github.com/llvm/llvm-project/commit/573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4.diff

LOG: [lld-macho] Handle LOH_ARM64_ADRP_LDR linker optimization hints

This linker optimization hint transforms a pair of adrp+ldr (immediate)
instructions into an ldr (literal) load from a PC-relative address if
it is 4-byte aligned and within +/- 1 MiB, as ldr can encode a signed
19-bit offset that gets multiplied by 4.

In the wild, only a small number of these hints are applicable because
not many loads end up close enough to the data segment. However, the
added helper functions will be useful in implementing the rest of the
LOH types.

Differential Revision: https://reviews.llvm.org/D128942

Added: 
    lld/test/MachO/loh-adrp-ldr.s

Modified: 
    lld/MachO/Arch/ARM64.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 8c0c2d7e716ad..93941f425bf3b 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -163,6 +163,17 @@ struct Add {
   uint32_t addend;
 };
 
+enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 };
+
+struct Ldr {
+  uint8_t destRegister;
+  uint8_t baseRegister;
+  uint8_t size;
+  bool isFloat;
+  ExtendType extendType;
+  uint64_t offset;
+};
+
 struct PerformedReloc {
   const Reloc &rel;
   uint64_t referentVA;
@@ -177,6 +188,7 @@ class OptimizationHintContext {
 
   void applyAdrpAdd(const OptimizationHint &);
   void applyAdrpAdrp(const OptimizationHint &);
+  void applyAdrpLdr(const OptimizationHint &);
 
 private:
   uint8_t *buf;
@@ -207,6 +219,41 @@ static bool parseAdd(uint32_t insn, Add &add) {
   return true;
 }
 
+static bool parseLdr(uint32_t insn, Ldr &ldr) {
+  ldr.destRegister = insn & 0x1f;
+  ldr.baseRegister = (insn >> 5) & 0x1f;
+  uint8_t size = insn >> 30;
+  uint8_t opc = (insn >> 22) & 3;
+
+  if ((insn & 0x3fc00000) == 0x39400000) {
+    // LDR (immediate), LDRB (immediate), LDRH (immediate)
+    ldr.size = 1 << size;
+    ldr.extendType = ZeroExtend;
+    ldr.isFloat = false;
+  } else if ((insn & 0x3f800000) == 0x39800000) {
+    // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
+    ldr.size = 1 << size;
+    ldr.extendType = static_cast<ExtendType>(opc);
+    ldr.isFloat = false;
+  } else if ((insn & 0x3f400000) == 0x3d400000) {
+    // LDR (immediate, SIMD&FP)
+    ldr.extendType = ZeroExtend;
+    ldr.isFloat = true;
+    if (size == 2 && opc == 1)
+      ldr.size = 4;
+    else if (size == 3 && opc == 1)
+      ldr.size = 8;
+    else if (size == 0 && opc == 3)
+      ldr.size = 16;
+    else
+      return false;
+  } else {
+    return false;
+  }
+  ldr.offset = ((insn >> 10) & 0xfff) * ldr.size;
+  return true;
+}
+
 static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
   uint32_t opcode = 0x10000000;
   uint32_t immHi = (delta & 0x001ffffc) << 3;
@@ -216,6 +263,28 @@ static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
 
 static void writeNop(void *loc) { write32le(loc, 0xd503201f); }
 
+static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) {
+  uint32_t imm19 = (delta << 3) & 0x00ffffe0;
+  uint32_t opcode = 0;
+  switch (original.size) {
+  case 4:
+    if (original.isFloat)
+      opcode = 0x1c000000;
+    else
+      opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000;
+    break;
+  case 8:
+    opcode = original.isFloat ? 0x5c000000 : 0x58000000;
+    break;
+  case 16:
+    opcode = 0x9c000000;
+    break;
+  default:
+    assert(false && "Invalid size for literal ldr");
+  }
+  write32le(loc, opcode | imm19 | original.destRegister);
+}
+
 uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) {
   size_t relocIdx = &reloc - isec->relocs.data();
   return relocTargets[relocIdx];
@@ -316,6 +385,45 @@ void OptimizationHintContext::applyAdrpAdrp(const OptimizationHint &hint) {
   writeNop(buf + hint.offset0 + hint.delta[0]);
 }
 
+// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
+// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
+// as ldr can encode a signed 19-bit offset that gets multiplied by 4.
+//
+//   adrp xN, _foo at PAGE
+//   ldr  xM, [xN, _foo at PAGEOFF]
+// ->
+//   nop
+//   ldr  xM, _foo
+void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) {
+  uint32_t ins1 = read32le(buf + hint.offset0);
+  uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+  Adrp adrp;
+  if (!parseAdrp(ins1, adrp))
+    return;
+  Ldr ldr;
+  if (!parseLdr(ins2, ldr))
+    return;
+  if (adrp.destRegister != ldr.baseRegister)
+    return;
+
+  Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
+  Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
+  if (!rel1 || !rel2)
+    return;
+  if (ldr.offset != (rel1->referentVA & 0xfff))
+    return;
+  if ((rel1->referentVA & 3) != 0)
+    return;
+  if (ldr.size == 1 || ldr.size == 2)
+    return;
+  int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA();
+  if (delta >= (1 << 20) || delta < -(1 << 20))
+    return;
+
+  writeNop(buf + hint.offset0);
+  writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta);
+}
+
 void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
                                    ArrayRef<uint64_t> relocTargets) const {
   assert(isec);
@@ -332,6 +440,8 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
       // might cause its targets to be turned into NOPs.
       break;
     case LOH_ARM64_ADRP_LDR:
+      ctx1.applyAdrpLdr(hint);
+      break;
     case LOH_ARM64_ADRP_ADD_LDR:
     case LOH_ARM64_ADRP_LDR_GOT_LDR:
     case LOH_ARM64_ADRP_ADD_STR:

diff  --git a/lld/test/MachO/loh-adrp-ldr.s b/lld/test/MachO/loh-adrp-ldr.s
new file mode 100644
index 0000000000000..46e8e3a9d55e5
--- /dev/null
+++ b/lld/test/MachO/loh-adrp-ldr.s
@@ -0,0 +1,149 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: %lld -arch arm64 %t.o -o %t
+# RUN: llvm-objdump -d --macho %t | FileCheck %s
+
+.text
+.align 2
+_before_far:
+ .space 1048576
+
+.align 2
+_before_near:
+  .quad 0
+
+.globl _main
+# CHECK-LABEL: _main:
+_main:
+## Out of range, before
+L1:  adrp  x0, _before_far at PAGE
+L2:  ldr   x0, [x0, _before_far at PAGEOFF]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: ldr x0
+
+## In range, before
+L3:  adrp  x1, _before_near at PAGE
+L4:  ldr   x1, [x1, _before_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x1, #-20
+
+## Registers don't match (invalid input)
+L5:  adrp  x2, _before_near at PAGE
+L6:  ldr   x3, [x3, _before_near at PAGEOFF]
+# CHECK-NEXT: adrp x2
+# CHECK-NEXT: ldr x3
+
+## Targets don't match (invalid input)
+L7:  adrp  x4, _before_near at PAGE
+L8:  ldr   x4, [x4, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x4
+# CHECK-NEXT: ldr x4
+
+## Not an adrp instruction
+L9:  udf   0
+L10: ldr   x5, [x5, _after_near at PAGEOFF]
+# CHECK-NEXT: udf
+# CHECK-NEXT: ldr x5
+
+## Not an ldr with an immediate offset
+L11: adrp  x6, _after_near at PAGE
+L12: ldr   x6, 0
+# CHECK-NEXT: adrp x6
+# CHECK-NEXT: ldr x6, #0
+
+## Target is not aligned to 4 bytes
+L13: adrp  x7, _after_unaligned at PAGE
+L14: ldr   x7, [x7, _after_unaligned at PAGEOFF]
+# CHECK-NEXT: adrp x7
+# CHECK-NEXT: ldr x7
+
+## Byte load, unsupported
+L15: adrp  x8, _after_near at PAGE
+L16: ldr   b8, [x8, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x8
+# CHECK-NEXT: ldr b8
+
+## Halfword load, unsupported
+L17: adrp  x9, _after_near at PAGE
+L18: ldr   h9, [x9, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x9
+# CHECK-NEXT: ldr h9
+
+## Word load
+L19: adrp  x10, _after_near at PAGE
+L20: ldr   w10, [x10, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr w10, _after_near
+
+## With addend
+L21: adrp  x11, _after_near at PAGE + 8
+L22: ldr   x11, [x11, _after_near at PAGEOFF + 8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x11
+
+## Signed 32-bit read from 16-bit value, unsupported
+L23: adrp  x12, _after_near at PAGE
+L24: ldrsb w12, [x12, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x12
+# CHECK-NEXT: ldrsb w12
+
+## 64-bit load from signed 32-bit value
+L25: adrp  x13, _after_near at PAGE
+L26: ldrsw x13, [x13, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrsw x13, _after_near
+
+## Single precision FP read
+L27: adrp  x14, _after_near at PAGE
+L28: ldr   s0, [x14, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr s0, _after_near
+
+## Double precision FP read
+L29: adrp  x15, _after_near at PAGE
+L30: ldr   d0, [x15, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr d0, _after_near
+
+## Quad precision FP read
+L31: adrp  x16, _after_near at PAGE
+L32: ldr   q0, [x16, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr q0, _after_near
+
+## Out of range, after
+L33: adrp  x17, _after_far at PAGE
+L34: ldr   x17, [x17, _after_far at PAGEOFF]
+# CHECK-NEXT: adrp x17
+# CHECK-NEXT: ldr x17
+
+.data
+.align 4
+_after_near:
+  .quad 0
+  .quad 0
+  .byte 0
+_after_unaligned:
+.space 1048575
+
+_after_far:
+  .quad 0
+
+.loh AdrpLdr L1, L2
+.loh AdrpLdr L3, L4
+.loh AdrpLdr L5, L6
+.loh AdrpLdr L7, L8
+.loh AdrpLdr L9, L10
+.loh AdrpLdr L11, L12
+.loh AdrpLdr L13, L14
+.loh AdrpLdr L15, L16
+.loh AdrpLdr L17, L18
+.loh AdrpLdr L19, L20
+.loh AdrpLdr L21, L22
+.loh AdrpLdr L23, L24
+.loh AdrpLdr L25, L26
+.loh AdrpLdr L27, L28
+.loh AdrpLdr L29, L30
+.loh AdrpLdr L31, L32
+.loh AdrpLdr L33, L34


        


More information about the llvm-commits mailing list