[lld] d1e40f4 - [lld-macho] Add LOH_ARM64_ADRP_ADD_LDR optimization hint support

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 28 01:46:48 PDT 2022


Author: Daniel Bertalan
Date: 2022-07-28T10:45:28+02:00
New Revision: d1e40f4d584fc6dd032938c6c5fb079f6b8121bd

URL: https://github.com/llvm/llvm-project/commit/d1e40f4d584fc6dd032938c6c5fb079f6b8121bd
DIFF: https://github.com/llvm/llvm-project/commit/d1e40f4d584fc6dd032938c6c5fb079f6b8121bd.diff

LOG: [lld-macho] Add LOH_ARM64_ADRP_ADD_LDR optimization hint support

This hint instructs the linker to optimize an adrp+add+ldr sequence used
for loading from a local symbol's address by loading directly if it's
close enough, or with an adrp(p)+ldr sequence if it's not.

This transformation is the same as what's done for ADRP_LDR_GOT_LDR when
the symbol is local. The logic for acting on this hint is therefore
moved to a new function which will be called from the existing
applyAdrpLdrGotLdr() function.

Differential Revision: https://reviews.llvm.org/D130505

Added: 
    lld/test/MachO/loh-adrp-add-ldr.s

Modified: 
    lld/MachO/Arch/ARM64.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 46e935aa1fd1f..5999d9b0df587 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -184,6 +184,7 @@ class OptimizationHintContext {
   void applyAdrpAdrp(const OptimizationHint &);
   void applyAdrpLdr(const OptimizationHint &);
   void applyAdrpLdrGot(const OptimizationHint &);
+  void applyAdrpAddLdr(const OptimizationHint &);
   void applyAdrpLdrGotLdr(const OptimizationHint &);
 
 private:
@@ -467,80 +468,109 @@ void OptimizationHintContext::applyAdrpLdrGot(const OptimizationHint &hint) {
     applyAdrpLdr(hint);
 }
 
-// Relaxes a GOT-indirect load.
-// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
-// the GOT entry can be loaded with a single literal ldr instruction.
-// If the referenced symbol is local, its address may be loaded directly if it's
-// close enough, or with an adr(p) + ldr pair if it's not.
-void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) {
+// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
+// address by loading directly if it's close enough, or to an adrp(p)+ldr
+// sequence if it's not.
+//
+//   adrp x0, _foo at PAGE
+//   add  x1, x0, _foo at PAGEOFF
+//   ldr  x2, [x1, #off]
+void OptimizationHintContext::applyAdrpAddLdr(const OptimizationHint &hint) {
   uint32_t ins1 = read32le(buf + hint.offset0);
   Adrp adrp;
   if (!parseAdrp(ins1, adrp))
     return;
+  uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+  Add add;
+  if (!parseAdd(ins2, add))
+    return;
   uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]);
-  Ldr ldr3;
-  if (!parseLdr(ins3, ldr3))
+  Ldr ldr;
+  if (!parseLdr(ins3, ldr))
     return;
-  uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
-  Ldr ldr2;
-  Add add2;
 
   Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
   Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
   if (!rel1 || !rel2)
     return;
 
-  if (parseAdd(ins2, add2)) {
-    // adrp x0, _foo at PAGE
-    // add  x1, x0, _foo at PAGEOFF
-    // ldr  x2, [x1, #off]
+  if (adrp.destRegister != add.srcRegister)
+    return;
+  if (add.destRegister != ldr.baseRegister)
+    return;
 
-    if (adrp.destRegister != add2.srcRegister)
-      return;
-    if (add2.destRegister != ldr3.baseRegister)
-      return;
+  // Load from the target address directly.
+  //   nop
+  //   nop
+  //   ldr x2, [_foo + #off]
+  uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA();
+  Ldr literalLdr = ldr;
+  literalLdr.offset += rel1->referentVA - rel3VA;
+  if (isLiteralLdrEligible(literalLdr)) {
+    writeNop(buf + hint.offset0);
+    writeNop(buf + hint.offset0 + hint.delta[0]);
+    writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr);
+    return;
+  }
 
-    // Load from the target address directly.
-    //   nop
-    //   nop
-    //   ldr x2, [_foo + #off]
-    uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA();
-    Ldr literalLdr = ldr3;
-    literalLdr.offset += rel1->referentVA - rel3VA;
-    if (isLiteralLdrEligible(literalLdr)) {
-      writeNop(buf + hint.offset0);
-      writeNop(buf + hint.offset0 + hint.delta[0]);
-      writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr);
-      return;
-    }
+  // Load the target address into a register and load from there indirectly.
+  //   adr x1, _foo
+  //   nop
+  //   ldr x2, [x1, #off]
+  int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA();
+  if (isValidAdrOffset(adrOffset)) {
+    writeAdr(buf + hint.offset0, ldr.baseRegister, adrOffset);
+    // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
+    // not for AdrpLdrGotLdr. Its effect is the same either way.
+    writeNop(buf + hint.offset0 + hint.delta[0]);
+    return;
+  }
 
-    // Load the target address into a register and load from there indirectly.
-    //   adr x1, _foo
-    //   nop
-    //   ldr x2, [x1, #off]
-    int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA();
-    if (isValidAdrOffset(adrOffset)) {
-      writeAdr(buf + hint.offset0, ldr3.baseRegister, adrOffset);
-      writeNop(buf + hint.offset0 + hint.delta[0]);
-      return;
-    }
+  // Move the target's page offset into the ldr's immediate offset.
+  //   adrp x0, _foo at PAGE
+  //   nop
+  //   ldr x2, [x0, _foo at PAGEOFF + #off]
+  Ldr immediateLdr = ldr;
+  immediateLdr.baseRegister = adrp.destRegister;
+  immediateLdr.offset += add.addend;
+  if (isImmediateLdrEligible(immediateLdr)) {
+    writeNop(buf + hint.offset0 + hint.delta[0]);
+    writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr);
+    return;
+  }
+}
 
-    // Move the target's page offset into the ldr's immediate offset.
-    //   adrp x0, _foo at PAGE
-    //   nop
-    //   ldr x2, [x0, _foo at PAGEOFF + #off]
-    Ldr immediateLdr = ldr3;
-    immediateLdr.baseRegister = adrp.destRegister;
-    immediateLdr.offset += add2.addend;
-    if (isImmediateLdrEligible(immediateLdr)) {
-      writeNop(buf + hint.offset0 + hint.delta[0]);
-      writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr);
-      return;
-    }
+// Relaxes a GOT-indirect load.
+// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
+// the GOT entry can be loaded with a single literal ldr instruction.
+// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
+// we perform the AdrpAddLdr transformation.
+void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) {
+  uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+  Add add;
+  Ldr ldr2;
+
+  if (parseAdd(ins2, add)) {
+    applyAdrpAddLdr(hint);
   } else if (parseLdr(ins2, ldr2)) {
     // adrp x1, _foo at GOTPAGE
     // ldr  x2, [x1, _foo at GOTPAGEOFF]
     // ldr  x3, [x2, #off]
+
+    uint32_t ins1 = read32le(buf + hint.offset0);
+    Adrp adrp;
+    if (!parseAdrp(ins1, adrp))
+      return;
+    uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]);
+    Ldr ldr3;
+    if (!parseLdr(ins3, ldr3))
+      return;
+
+    Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
+    Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
+    if (!rel1 || !rel2)
+      return;
+
     if (ldr2.baseRegister != adrp.destRegister)
       return;
     if (ldr3.baseRegister != ldr2.destRegister)
@@ -581,7 +611,7 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
       ctx1.applyAdrpLdr(hint);
       break;
     case LOH_ARM64_ADRP_ADD_LDR:
-      // TODO: Implement this
+      ctx1.applyAdrpAddLdr(hint);
       break;
     case LOH_ARM64_ADRP_LDR_GOT_LDR:
       ctx1.applyAdrpLdrGotLdr(hint);

diff  --git a/lld/test/MachO/loh-adrp-add-ldr.s b/lld/test/MachO/loh-adrp-add-ldr.s
new file mode 100644
index 0000000000000..c844917195613
--- /dev/null
+++ b/lld/test/MachO/loh-adrp-add-ldr.s
@@ -0,0 +1,185 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: %lld -arch arm64 %t.o -o %t
+# RUN: llvm-objdump -d --macho %t | FileCheck %s
+
+## This is mostly a copy of loh-adrp-ldr-got-ldr.s's `local.s` test, except that Adrp+Ldr+Ldr
+## triples have been changed to Adrp+Add+Ldr. The performed optimization is the same.
+.text
+.align 2
+.globl _main
+_main:
+
+### Transformation to a literal LDR
+## Basic case
+L1: adrp x0, _close at PAGE
+L2: add  x1, x0, _close at PAGEOFF
+L3: ldr  x2, [x1]
+# CHECK-LABEL: _main:
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## Load with offset
+L4: adrp x0, _close at PAGE
+L5: add x1, x0, _close at PAGEOFF
+L6: ldr  x2, [x1, #8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## 32 bit load
+L7: adrp x0, _close at PAGE
+L8: add  x1, x0, _close at PAGEOFF
+L9: ldr  w1, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr w1, _close
+
+## Floating point
+L10: adrp x0, _close at PAGE
+L11: add  x1, x0, _close at PAGEOFF
+L12: ldr  s1, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr s1, _close
+
+L13: adrp x0, _close at PAGE
+L14: add  x1, x0, _close at PAGEOFF
+L15: ldr  d1, [x1, #8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr d1, _close8
+
+L16: adrp x0, _close at PAGE
+L17: add  x1, x0, _close at PAGEOFF
+L18: ldr  q0, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr q0, _close
+
+
+### Transformation to ADR+LDR
+## 1 byte floating point load
+L19: adrp x0, _close at PAGE
+L20: add  x1, x0, _close at PAGEOFF
+L21: ldr  b2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr b2, [x1]
+
+## 1 byte GPR load, zero extend
+L22: adrp x0, _close at PAGE
+L23: add  x1, x0, _close at PAGEOFF
+L24: ldrb w2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrb w2, [x1]
+
+## 1 byte GPR load, sign extend
+L25: adrp  x0, _close at PAGE
+L26: add   x1, x0, _close at PAGEOFF
+L27: ldrsb x2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrsb x2, [x1]
+
+## Unaligned
+L28: adrp x0, _unaligned at PAGE
+L29: add  x1, x0, _close at PAGEOFF
+L30: ldr  x2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2, [x1]
+
+
+### Transformation to ADRP + immediate LDR
+## Basic test: target is far
+L31: adrp x0, _far at PAGE
+L32: add  x1, x0, _far at PAGEOFF
+L33: ldr  x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## With offset
+L34: adrp x0, _far at PAGE
+L35: add  x1, x0, _far at PAGEOFF
+L36: ldr  x2, [x1, #8]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+### No changes
+## Far and unaligned
+L37: adrp x0, _far_unaligned at PAGE
+L38: add  x1, x0, _far_unaligned at PAGEOFF
+L39: ldr  x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x1]
+
+## Far with large offset (_far_offset at PAGE + #255 > 4095)
+L40: adrp x0, _far_offset at PAGE
+L41: add  x1, x0, _far_offset at PAGEOFF
+L42: ldrb w2, [x1, #255]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldrb w2, [x1, #255]
+
+### Invalid inputs; the instructions should be left untouched.
+## Registers don't match
+L43: adrp x0, _far at PAGE
+L44: add  x1, x0, _far at PAGEOFF
+L45: ldr  x2, [x2]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x2]
+
+## Targets don't match
+L46: adrp x0, _close at PAGE
+L47: add  x1, x0, _close8 at PAGEOFF
+L48: ldr  x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x1]
+
+.data
+.align 4
+    .quad 0
+_close:
+    .quad 0
+_close8:
+    .quad 0
+    .byte 0
+_unaligned:
+    .quad 0
+
+.space 1048576
+.align 12
+    .quad 0
+_far:
+     .quad 0
+    .byte 0
+_far_unaligned:
+    .quad 0
+.space 4000
+_far_offset:
+    .byte 0
+
+.loh AdrpAddLdr L1, L2, L3
+.loh AdrpAddLdr L4, L5, L6
+.loh AdrpAddLdr L7, L8, L9
+.loh AdrpAddLdr L10, L11, L12
+.loh AdrpAddLdr L13, L14, L15
+.loh AdrpAddLdr L16, L17, L18
+.loh AdrpAddLdr L19, L20, L21
+.loh AdrpAddLdr L22, L23, L24
+.loh AdrpAddLdr L25, L26, L27
+.loh AdrpAddLdr L28, L29, L30
+.loh AdrpAddLdr L31, L32, L33
+.loh AdrpAddLdr L34, L35, L36
+.loh AdrpAddLdr L37, L38, L39
+.loh AdrpAddLdr L40, L41, L42
+.loh AdrpAddLdr L43, L44, L45


        


More information about the llvm-commits mailing list