[lld] d1e40f4 - [lld-macho] Add LOH_ARM64_ADRP_ADD_LDR optimization hint support
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 01:46:48 PDT 2022
Author: Daniel Bertalan
Date: 2022-07-28T10:45:28+02:00
New Revision: d1e40f4d584fc6dd032938c6c5fb079f6b8121bd
URL: https://github.com/llvm/llvm-project/commit/d1e40f4d584fc6dd032938c6c5fb079f6b8121bd
DIFF: https://github.com/llvm/llvm-project/commit/d1e40f4d584fc6dd032938c6c5fb079f6b8121bd.diff
LOG: [lld-macho] Add LOH_ARM64_ADRP_ADD_LDR optimization hint support
This hint instructs the linker to optimize an adrp+add+ldr sequence used
for loading from a local symbol's address by loading directly if it's
close enough, or with an adrp(p)+ldr sequence if it's not.
This transformation is the same as what's done for ADRP_LDR_GOT_LDR when
the symbol is local. The logic for acting on this hint is therefore
moved to a new function which will be called from the existing
applyAdrpLdrGotLdr() function.
Differential Revision: https://reviews.llvm.org/D130505
Added:
lld/test/MachO/loh-adrp-add-ldr.s
Modified:
lld/MachO/Arch/ARM64.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 46e935aa1fd1f..5999d9b0df587 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -184,6 +184,7 @@ class OptimizationHintContext {
void applyAdrpAdrp(const OptimizationHint &);
void applyAdrpLdr(const OptimizationHint &);
void applyAdrpLdrGot(const OptimizationHint &);
+ void applyAdrpAddLdr(const OptimizationHint &);
void applyAdrpLdrGotLdr(const OptimizationHint &);
private:
@@ -467,80 +468,109 @@ void OptimizationHintContext::applyAdrpLdrGot(const OptimizationHint &hint) {
applyAdrpLdr(hint);
}
-// Relaxes a GOT-indirect load.
-// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
-// the GOT entry can be loaded with a single literal ldr instruction.
-// If the referenced symbol is local, its address may be loaded directly if it's
-// close enough, or with an adr(p) + ldr pair if it's not.
-void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) {
+// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
+// address by loading directly if it's close enough, or to an adrp(p)+ldr
+// sequence if it's not.
+//
+// adrp x0, _foo at PAGE
+// add x1, x0, _foo at PAGEOFF
+// ldr x2, [x1, #off]
+void OptimizationHintContext::applyAdrpAddLdr(const OptimizationHint &hint) {
uint32_t ins1 = read32le(buf + hint.offset0);
Adrp adrp;
if (!parseAdrp(ins1, adrp))
return;
+ uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+ Add add;
+ if (!parseAdd(ins2, add))
+ return;
uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]);
- Ldr ldr3;
- if (!parseLdr(ins3, ldr3))
+ Ldr ldr;
+ if (!parseLdr(ins3, ldr))
return;
- uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
- Ldr ldr2;
- Add add2;
Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
if (!rel1 || !rel2)
return;
- if (parseAdd(ins2, add2)) {
- // adrp x0, _foo at PAGE
- // add x1, x0, _foo at PAGEOFF
- // ldr x2, [x1, #off]
+ if (adrp.destRegister != add.srcRegister)
+ return;
+ if (add.destRegister != ldr.baseRegister)
+ return;
- if (adrp.destRegister != add2.srcRegister)
- return;
- if (add2.destRegister != ldr3.baseRegister)
- return;
+ // Load from the target address directly.
+ // nop
+ // nop
+ // ldr x2, [_foo + #off]
+ uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA();
+ Ldr literalLdr = ldr;
+ literalLdr.offset += rel1->referentVA - rel3VA;
+ if (isLiteralLdrEligible(literalLdr)) {
+ writeNop(buf + hint.offset0);
+ writeNop(buf + hint.offset0 + hint.delta[0]);
+ writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr);
+ return;
+ }
- // Load from the target address directly.
- // nop
- // nop
- // ldr x2, [_foo + #off]
- uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA();
- Ldr literalLdr = ldr3;
- literalLdr.offset += rel1->referentVA - rel3VA;
- if (isLiteralLdrEligible(literalLdr)) {
- writeNop(buf + hint.offset0);
- writeNop(buf + hint.offset0 + hint.delta[0]);
- writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr);
- return;
- }
+ // Load the target address into a register and load from there indirectly.
+ // adr x1, _foo
+ // nop
+ // ldr x2, [x1, #off]
+ int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA();
+ if (isValidAdrOffset(adrOffset)) {
+ writeAdr(buf + hint.offset0, ldr.baseRegister, adrOffset);
+ // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
+ // not for AdrpLdrGotLdr. Its effect is the same either way.
+ writeNop(buf + hint.offset0 + hint.delta[0]);
+ return;
+ }
- // Load the target address into a register and load from there indirectly.
- // adr x1, _foo
- // nop
- // ldr x2, [x1, #off]
- int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA();
- if (isValidAdrOffset(adrOffset)) {
- writeAdr(buf + hint.offset0, ldr3.baseRegister, adrOffset);
- writeNop(buf + hint.offset0 + hint.delta[0]);
- return;
- }
+ // Move the target's page offset into the ldr's immediate offset.
+ // adrp x0, _foo at PAGE
+ // nop
+ // ldr x2, [x0, _foo at PAGEOFF + #off]
+ Ldr immediateLdr = ldr;
+ immediateLdr.baseRegister = adrp.destRegister;
+ immediateLdr.offset += add.addend;
+ if (isImmediateLdrEligible(immediateLdr)) {
+ writeNop(buf + hint.offset0 + hint.delta[0]);
+ writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr);
+ return;
+ }
+}
- // Move the target's page offset into the ldr's immediate offset.
- // adrp x0, _foo at PAGE
- // nop
- // ldr x2, [x0, _foo at PAGEOFF + #off]
- Ldr immediateLdr = ldr3;
- immediateLdr.baseRegister = adrp.destRegister;
- immediateLdr.offset += add2.addend;
- if (isImmediateLdrEligible(immediateLdr)) {
- writeNop(buf + hint.offset0 + hint.delta[0]);
- writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr);
- return;
- }
+// Relaxes a GOT-indirect load.
+// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
+// the GOT entry can be loaded with a single literal ldr instruction.
+// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
+// we perform the AdrpAddLdr transformation.
+void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) {
+ uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+ Add add;
+ Ldr ldr2;
+
+ if (parseAdd(ins2, add)) {
+ applyAdrpAddLdr(hint);
} else if (parseLdr(ins2, ldr2)) {
// adrp x1, _foo at GOTPAGE
// ldr x2, [x1, _foo at GOTPAGEOFF]
// ldr x3, [x2, #off]
+
+ uint32_t ins1 = read32le(buf + hint.offset0);
+ Adrp adrp;
+ if (!parseAdrp(ins1, adrp))
+ return;
+ uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]);
+ Ldr ldr3;
+ if (!parseLdr(ins3, ldr3))
+ return;
+
+ Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
+ Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
+ if (!rel1 || !rel2)
+ return;
+
if (ldr2.baseRegister != adrp.destRegister)
return;
if (ldr3.baseRegister != ldr2.destRegister)
@@ -581,7 +611,7 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
ctx1.applyAdrpLdr(hint);
break;
case LOH_ARM64_ADRP_ADD_LDR:
- // TODO: Implement this
+ ctx1.applyAdrpAddLdr(hint);
break;
case LOH_ARM64_ADRP_LDR_GOT_LDR:
ctx1.applyAdrpLdrGotLdr(hint);
diff --git a/lld/test/MachO/loh-adrp-add-ldr.s b/lld/test/MachO/loh-adrp-add-ldr.s
new file mode 100644
index 0000000000000..c844917195613
--- /dev/null
+++ b/lld/test/MachO/loh-adrp-add-ldr.s
@@ -0,0 +1,185 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: %lld -arch arm64 %t.o -o %t
+# RUN: llvm-objdump -d --macho %t | FileCheck %s
+
+## This is mostly a copy of loh-adrp-ldr-got-ldr.s's `local.s` test, except that Adrp+Ldr+Ldr
+## triples have been changed to Adrp+Add+Ldr. The performed optimization is the same.
+.text
+.align 2
+.globl _main
+_main:
+
+### Transformation to a literal LDR
+## Basic case
+L1: adrp x0, _close at PAGE
+L2: add x1, x0, _close at PAGEOFF
+L3: ldr x2, [x1]
+# CHECK-LABEL: _main:
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## Load with offset
+L4: adrp x0, _close at PAGE
+L5: add x1, x0, _close at PAGEOFF
+L6: ldr x2, [x1, #8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## 32 bit load
+L7: adrp x0, _close at PAGE
+L8: add x1, x0, _close at PAGEOFF
+L9: ldr w1, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr w1, _close
+
+## Floating point
+L10: adrp x0, _close at PAGE
+L11: add x1, x0, _close at PAGEOFF
+L12: ldr s1, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr s1, _close
+
+L13: adrp x0, _close at PAGE
+L14: add x1, x0, _close at PAGEOFF
+L15: ldr d1, [x1, #8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr d1, _close8
+
+L16: adrp x0, _close at PAGE
+L17: add x1, x0, _close at PAGEOFF
+L18: ldr q0, [x1]
+# CHECK-NEXT: nop
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr q0, _close
+
+
+### Transformation to ADR+LDR
+## 1 byte floating point load
+L19: adrp x0, _close at PAGE
+L20: add x1, x0, _close at PAGEOFF
+L21: ldr b2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr b2, [x1]
+
+## 1 byte GPR load, zero extend
+L22: adrp x0, _close at PAGE
+L23: add x1, x0, _close at PAGEOFF
+L24: ldrb w2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrb w2, [x1]
+
+## 1 byte GPR load, sign extend
+L25: adrp x0, _close at PAGE
+L26: add x1, x0, _close at PAGEOFF
+L27: ldrsb x2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrsb x2, [x1]
+
+## Unaligned
+L28: adrp x0, _unaligned at PAGE
+L29: add x1, x0, _close at PAGEOFF
+L30: ldr x2, [x1]
+# CHECK-NEXT: adr x1
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2, [x1]
+
+
+### Transformation to ADRP + immediate LDR
+## Basic test: target is far
+L31: adrp x0, _far at PAGE
+L32: add x1, x0, _far at PAGEOFF
+L33: ldr x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+## With offset
+L34: adrp x0, _far at PAGE
+L35: add x1, x0, _far at PAGEOFF
+L36: ldr x2, [x1, #8]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x2
+
+### No changes
+## Far and unaligned
+L37: adrp x0, _far_unaligned at PAGE
+L38: add x1, x0, _far_unaligned at PAGEOFF
+L39: ldr x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x1]
+
+## Far with large offset (_far_offset at PAGE + #255 > 4095)
+L40: adrp x0, _far_offset at PAGE
+L41: add x1, x0, _far_offset at PAGEOFF
+L42: ldrb w2, [x1, #255]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldrb w2, [x1, #255]
+
+### Invalid inputs; the instructions should be left untouched.
+## Registers don't match
+L43: adrp x0, _far at PAGE
+L44: add x1, x0, _far at PAGEOFF
+L45: ldr x2, [x2]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x2]
+
+## Targets don't match
+L46: adrp x0, _close at PAGE
+L47: add x1, x0, _close8 at PAGEOFF
+L48: ldr x2, [x1]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: add x1, x0
+# CHECK-NEXT: ldr x2, [x1]
+
+.data
+.align 4
+ .quad 0
+_close:
+ .quad 0
+_close8:
+ .quad 0
+ .byte 0
+_unaligned:
+ .quad 0
+
+.space 1048576
+.align 12
+ .quad 0
+_far:
+ .quad 0
+ .byte 0
+_far_unaligned:
+ .quad 0
+.space 4000
+_far_offset:
+ .byte 0
+
+.loh AdrpAddLdr L1, L2, L3
+.loh AdrpAddLdr L4, L5, L6
+.loh AdrpAddLdr L7, L8, L9
+.loh AdrpAddLdr L10, L11, L12
+.loh AdrpAddLdr L13, L14, L15
+.loh AdrpAddLdr L16, L17, L18
+.loh AdrpAddLdr L19, L20, L21
+.loh AdrpAddLdr L22, L23, L24
+.loh AdrpAddLdr L25, L26, L27
+.loh AdrpAddLdr L28, L29, L30
+.loh AdrpAddLdr L31, L32, L33
+.loh AdrpAddLdr L34, L35, L36
+.loh AdrpAddLdr L37, L38, L39
+.loh AdrpAddLdr L40, L41, L42
+.loh AdrpAddLdr L43, L44, L45
More information about the llvm-commits
mailing list