[lld] 573c7e6 - [lld-macho] Handle LOH_ARM64_ADRP_LDR linker optimization hints
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 1 00:48:17 PDT 2022
Author: Daniel Bertalan
Date: 2022-07-01T09:44:24+02:00
New Revision: 573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4
URL: https://github.com/llvm/llvm-project/commit/573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4
DIFF: https://github.com/llvm/llvm-project/commit/573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4.diff
LOG: [lld-macho] Handle LOH_ARM64_ADRP_LDR linker optimization hints
This linker optimization hint transforms a pair of adrp+ldr (immediate)
instructions into an ldr (literal) load from a PC-relative address if
it is 4-byte aligned and within +/- 1 MiB, as ldr can encode a signed
19-bit offset that gets multiplied by 4.
In the wild, only a small number of these hints are applicable because
not many loads end up close enough to the data segment. However, the
added helper functions will be useful in implementing the rest of the
LOH types.
Differential Revision: https://reviews.llvm.org/D128942
Added:
lld/test/MachO/loh-adrp-ldr.s
Modified:
lld/MachO/Arch/ARM64.cpp
Removed:
################################################################################
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 8c0c2d7e716ad..93941f425bf3b 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -163,6 +163,17 @@ struct Add {
uint32_t addend;
};
+enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 };
+
+struct Ldr {
+ uint8_t destRegister;
+ uint8_t baseRegister;
+ uint8_t size;
+ bool isFloat;
+ ExtendType extendType;
+ uint64_t offset;
+};
+
struct PerformedReloc {
const Reloc &rel;
uint64_t referentVA;
@@ -177,6 +188,7 @@ class OptimizationHintContext {
void applyAdrpAdd(const OptimizationHint &);
void applyAdrpAdrp(const OptimizationHint &);
+ void applyAdrpLdr(const OptimizationHint &);
private:
uint8_t *buf;
@@ -207,6 +219,41 @@ static bool parseAdd(uint32_t insn, Add &add) {
return true;
}
+static bool parseLdr(uint32_t insn, Ldr &ldr) {
+ ldr.destRegister = insn & 0x1f;
+ ldr.baseRegister = (insn >> 5) & 0x1f;
+ uint8_t size = insn >> 30;
+ uint8_t opc = (insn >> 22) & 3;
+
+ if ((insn & 0x3fc00000) == 0x39400000) {
+ // LDR (immediate), LDRB (immediate), LDRH (immediate)
+ ldr.size = 1 << size;
+ ldr.extendType = ZeroExtend;
+ ldr.isFloat = false;
+ } else if ((insn & 0x3f800000) == 0x39800000) {
+ // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
+ ldr.size = 1 << size;
+ ldr.extendType = static_cast<ExtendType>(opc);
+ ldr.isFloat = false;
+ } else if ((insn & 0x3f400000) == 0x3d400000) {
+ // LDR (immediate, SIMD&FP)
+ ldr.extendType = ZeroExtend;
+ ldr.isFloat = true;
+ if (size == 2 && opc == 1)
+ ldr.size = 4;
+ else if (size == 3 && opc == 1)
+ ldr.size = 8;
+ else if (size == 0 && opc == 3)
+ ldr.size = 16;
+ else
+ return false;
+ } else {
+ return false;
+ }
+ ldr.offset = ((insn >> 10) & 0xfff) * ldr.size;
+ return true;
+}
+
static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
uint32_t opcode = 0x10000000;
uint32_t immHi = (delta & 0x001ffffc) << 3;
@@ -216,6 +263,28 @@ static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
static void writeNop(void *loc) { write32le(loc, 0xd503201f); }
+static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) {
+ uint32_t imm19 = (delta << 3) & 0x00ffffe0;
+ uint32_t opcode = 0;
+ switch (original.size) {
+ case 4:
+ if (original.isFloat)
+ opcode = 0x1c000000;
+ else
+ opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000;
+ break;
+ case 8:
+ opcode = original.isFloat ? 0x5c000000 : 0x58000000;
+ break;
+ case 16:
+ opcode = 0x9c000000;
+ break;
+ default:
+ assert(false && "Invalid size for literal ldr");
+ }
+ write32le(loc, opcode | imm19 | original.destRegister);
+}
+
uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) {
size_t relocIdx = &reloc - isec->relocs.data();
return relocTargets[relocIdx];
@@ -316,6 +385,45 @@ void OptimizationHintContext::applyAdrpAdrp(const OptimizationHint &hint) {
writeNop(buf + hint.offset0 + hint.delta[0]);
}
+// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
+// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
+// as ldr can encode a signed 19-bit offset that gets multiplied by 4.
+//
+// adrp xN, _foo at PAGE
+// ldr xM, [xN, _foo at PAGEOFF]
+// ->
+// nop
+// ldr xM, _foo
+void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) {
+ uint32_t ins1 = read32le(buf + hint.offset0);
+ uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]);
+ Adrp adrp;
+ if (!parseAdrp(ins1, adrp))
+ return;
+ Ldr ldr;
+ if (!parseLdr(ins2, ldr))
+ return;
+ if (adrp.destRegister != ldr.baseRegister)
+ return;
+
+ Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0);
+ Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]);
+ if (!rel1 || !rel2)
+ return;
+ if (ldr.offset != (rel1->referentVA & 0xfff))
+ return;
+ if ((rel1->referentVA & 3) != 0)
+ return;
+ if (ldr.size == 1 || ldr.size == 2)
+ return;
+ int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA();
+ if (delta >= (1 << 20) || delta < -(1 << 20))
+ return;
+
+ writeNop(buf + hint.offset0);
+ writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta);
+}
+
void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
ArrayRef<uint64_t> relocTargets) const {
assert(isec);
@@ -332,6 +440,8 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec,
// might cause its targets to be turned into NOPs.
break;
case LOH_ARM64_ADRP_LDR:
+ ctx1.applyAdrpLdr(hint);
+ break;
case LOH_ARM64_ADRP_ADD_LDR:
case LOH_ARM64_ADRP_LDR_GOT_LDR:
case LOH_ARM64_ADRP_ADD_STR:
diff --git a/lld/test/MachO/loh-adrp-ldr.s b/lld/test/MachO/loh-adrp-ldr.s
new file mode 100644
index 0000000000000..46e8e3a9d55e5
--- /dev/null
+++ b/lld/test/MachO/loh-adrp-ldr.s
@@ -0,0 +1,149 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: %lld -arch arm64 %t.o -o %t
+# RUN: llvm-objdump -d --macho %t | FileCheck %s
+
+.text
+.align 2
+_before_far:
+ .space 1048576
+
+.align 2
+_before_near:
+ .quad 0
+
+.globl _main
+# CHECK-LABEL: _main:
+_main:
+## Out of range, before
+L1: adrp x0, _before_far at PAGE
+L2: ldr x0, [x0, _before_far at PAGEOFF]
+# CHECK-NEXT: adrp x0
+# CHECK-NEXT: ldr x0
+
+## In range, before
+L3: adrp x1, _before_near at PAGE
+L4: ldr x1, [x1, _before_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x1, #-20
+
+## Registers don't match (invalid input)
+L5: adrp x2, _before_near at PAGE
+L6: ldr x3, [x3, _before_near at PAGEOFF]
+# CHECK-NEXT: adrp x2
+# CHECK-NEXT: ldr x3
+
+## Targets don't match (invalid input)
+L7: adrp x4, _before_near at PAGE
+L8: ldr x4, [x4, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x4
+# CHECK-NEXT: ldr x4
+
+## Not an adrp instruction
+L9: udf 0
+L10: ldr x5, [x5, _after_near at PAGEOFF]
+# CHECK-NEXT: udf
+# CHECK-NEXT: ldr x5
+
+## Not an ldr with an immediate offset
+L11: adrp x6, _after_near at PAGE
+L12: ldr x6, 0
+# CHECK-NEXT: adrp x6
+# CHECK-NEXT: ldr x6, #0
+
+## Target is not aligned to 4 bytes
+L13: adrp x7, _after_unaligned at PAGE
+L14: ldr x7, [x7, _after_unaligned at PAGEOFF]
+# CHECK-NEXT: adrp x7
+# CHECK-NEXT: ldr x7
+
+## Byte load, unsupported
+L15: adrp x8, _after_near at PAGE
+L16: ldr b8, [x8, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x8
+# CHECK-NEXT: ldr b8
+
+## Halfword load, unsupported
+L17: adrp x9, _after_near at PAGE
+L18: ldr h9, [x9, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x9
+# CHECK-NEXT: ldr h9
+
+## Word load
+L19: adrp x10, _after_near at PAGE
+L20: ldr w10, [x10, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr w10, _after_near
+
+## With addend
+L21: adrp x11, _after_near at PAGE + 8
+L22: ldr x11, [x11, _after_near at PAGEOFF + 8]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr x11
+
+## Signed 32-bit read from 16-bit value, unsupported
+L23: adrp x12, _after_near at PAGE
+L24: ldrsb w12, [x12, _after_near at PAGEOFF]
+# CHECK-NEXT: adrp x12
+# CHECK-NEXT: ldrsb w12
+
+## 64-bit load from signed 32-bit value
+L25: adrp x13, _after_near at PAGE
+L26: ldrsw x13, [x13, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldrsw x13, _after_near
+
+## Single precision FP read
+L27: adrp x14, _after_near at PAGE
+L28: ldr s0, [x14, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr s0, _after_near
+
+## Double precision FP read
+L29: adrp x15, _after_near at PAGE
+L30: ldr d0, [x15, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr d0, _after_near
+
+## Quad precision FP read
+L31: adrp x16, _after_near at PAGE
+L32: ldr q0, [x16, _after_near at PAGEOFF]
+# CHECK-NEXT: nop
+# CHECK-NEXT: ldr q0, _after_near
+
+## Out of range, after
+L33: adrp x17, _after_far at PAGE
+L34: ldr x17, [x17, _after_far at PAGEOFF]
+# CHECK-NEXT: adrp x17
+# CHECK-NEXT: ldr x17
+
+.data
+.align 4
+_after_near:
+ .quad 0
+ .quad 0
+ .byte 0
+_after_unaligned:
+.space 1048575
+
+_after_far:
+ .quad 0
+
+.loh AdrpLdr L1, L2
+.loh AdrpLdr L3, L4
+.loh AdrpLdr L5, L6
+.loh AdrpLdr L7, L8
+.loh AdrpLdr L9, L10
+.loh AdrpLdr L11, L12
+.loh AdrpLdr L13, L14
+.loh AdrpLdr L15, L16
+.loh AdrpLdr L17, L18
+.loh AdrpLdr L19, L20
+.loh AdrpLdr L21, L22
+.loh AdrpLdr L23, L24
+.loh AdrpLdr L25, L26
+.loh AdrpLdr L27, L28
+.loh AdrpLdr L29, L30
+.loh AdrpLdr L31, L32
+.loh AdrpLdr L33, L34
More information about the llvm-commits
mailing list