[lld] 4450a2a - [lld][ELF] Add support for ADRP+ADD optimization for AArch64
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 1 22:18:13 PST 2022
Author: Alexander Shaposhnikov
Date: 2022-02-02T06:09:55Z
New Revision: 4450a2a23df0e7081ca7fee3ec641774afedc2bc
URL: https://github.com/llvm/llvm-project/commit/4450a2a23df0e7081ca7fee3ec641774afedc2bc
DIFF: https://github.com/llvm/llvm-project/commit/4450a2a23df0e7081ca7fee3ec641774afedc2bc.diff
LOG: [lld][ELF] Add support for ADRP+ADD optimization for AArch64
This diff adds support for ADRP+ADD optimization for AArch64 described in
https://github.com/ARM-software/abi-aa/commit/d2ca58c54b8e955cfef25c71822f837ae0439d73
i.e. under appropriate constraints
ADRP x0, symbol
ADD x0, x0, :lo12: symbol
can be turned into
NOP
ADR x0, symbol
Test plan: make check-all
Differential revision: https://reviews.llvm.org/D117614
Added:
lld/test/ELF/aarch64-adrp-add.s
Modified:
lld/ELF/Arch/AArch64.cpp
lld/ELF/InputSection.cpp
lld/ELF/Target.h
lld/test/ELF/aarch64-adrp-ldr-got.s
lld/test/ELF/aarch64-copy.s
lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
Removed:
################################################################################
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 784d578312d79..5789bc935b638 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
safeToRelaxAdrpLdr = i == size;
}
+bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
+ const Relocation &addRel, uint64_t secAddr,
+ uint8_t *buf) const {
+ // When the address of sym is within the range of ADR then
+ // we may relax
+ // ADRP xn, sym
+ // ADD xn, xn, :lo12: sym
+ // to
+ // NOP
+ // ADR xn, sym
+ if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
+ addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
+ return false;
+ // Check if the relocations apply to consecutive instructions.
+ if (adrpRel.offset + 4 != addRel.offset)
+ return false;
+ if (adrpRel.sym != addRel.sym)
+ return false;
+ if (adrpRel.addend != 0 || addRel.addend != 0)
+ return false;
+
+ uint32_t adrpInstr = read32le(buf + adrpRel.offset);
+ uint32_t addInstr = read32le(buf + addRel.offset);
+ // Check if the first instruction is ADRP and the second instruction is ADD.
+ if ((adrpInstr & 0x9f000000) != 0x90000000 ||
+ (addInstr & 0xffc00000) != 0x91000000)
+ return false;
+ uint32_t adrpDestReg = adrpInstr & 0x1f;
+ uint32_t addDestReg = addInstr & 0x1f;
+ uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
+ if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
+ return false;
+
+ Symbol &sym = *adrpRel.sym;
+ // Check if the address
diff erence is within 1MiB range.
+ int64_t val = sym.getVA() - (secAddr + addRel.offset);
+ if (val < -1024 * 1024 || val >= 1024 * 1024)
+ return false;
+
+ Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
+ /*addend=*/0, &sym};
+ // nop
+ write32le(buf + adrpRel.offset, 0xd503201f);
+ // adr x_<dest_reg>
+ write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
+ target->relocate(buf + adrRel.offset, adrRel, val);
+ return true;
+}
+
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
const Relocation &ldrRel, uint64_t secAddr,
uint8_t *buf) const {
@@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
getAArch64Page(secAddr + adrpSymRel.offset),
64));
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
+ tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
return true;
}
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 4e5b0f6859227..4b047f75ad69c 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1025,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
}
target.relocate(bufLoc, rel, targetVA);
break;
+ case R_AARCH64_PAGE_PC:
+ if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd(
+ rel, relocations[i + 1], secAddr, buf)) {
+ ++i;
+ continue;
+ }
+ target.relocate(bufLoc, rel, targetVA);
+ break;
case R_PPC64_RELAX_GOT_PC: {
// The R_PPC64_PCREL_OPT relocation must appear immediately after
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 15330b2be258b..e002114f84394 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -227,6 +227,8 @@ class AArch64Relaxer {
public:
explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
+ bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
+ uint64_t secAddr, uint8_t *buf) const;
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
uint64_t secAddr, uint8_t *buf) const;
};
diff --git a/lld/test/ELF/aarch64-adrp-add.s b/lld/test/ELF/aarch64-adrp-add.s
new file mode 100644
index 0000000000000..3b3eb18f04cb5
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-add.s
@@ -0,0 +1,107 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-low.t -o %t/a-low
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=OUT-OF-RANGE
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-high.t -o %t/a-high
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=OUT-OF-RANGE
+
+# OUT-OF-RANGE: adrp x30
+# OUT-OF-RANGE-NEXT: add x30, x30
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t -o %t/a-low
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=IN-RANGE-LOW
+
+# IN-RANGE-LOW: nop
+# IN-RANGE-LOW-NEXT: adr x30
+# IN-RANGE-LOW-NEXT: adrp x1
+# IN-RANGE-LOW-NEXT: add x1
+# IN-RANGE-LOW-NEXT: adrp x15
+# IN-RANGE-LOW-NEXT: add x15
+
+## ADRP and ADD use
diff erent registers, no relaxations should be applied.
+# IN-RANGE-LOW-NEXT: adrp x2
+# IN-RANGE-LOW-NEXT: add x3, x2
+
+## ADRP and ADD use
diff erent registers, no relaxations should be applied.
+# IN-RANGE-LOW-NEXT: adrp x2
+# IN-RANGE-LOW-NEXT: add x2, x3
+
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-high.t -o %t/a-high
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=IN-RANGE-HIGH
+
+# IN-RANGE-HIGH: nop
+# IN-RANGE-HIGH-NEXT: adr x30
+# IN-RANGE-HIGH-NEXT: nop
+# IN-RANGE-HIGH-NEXT: adr x1
+# IN-RANGE-HIGH-NEXT: nop
+# IN-RANGE-HIGH-NEXT: adr x15
+
+## ADRP and ADD use
diff erent registers, no relaxations should be applied.
+# IN-RANGE-HIGH-NEXT: adrp x2
+# IN-RANGE-HIGH-NEXT: add x3, x2
+
+## ADRP and ADD use
diff erent registers, no relaxations should be applied.
+# IN-RANGE-HIGH-NEXT: adrp x2
+# IN-RANGE-HIGH-NEXT: add x2, x3
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t --no-relax -o %t/a
+## --no-relax disables relaxations.
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s --check-prefix=OUT-OF-RANGE
+
+## .rodata and .text are close to each other,
+## the adrp + add pair can be relaxed to nop + adr, moreover, the address
diff erence
+## is equal to the lowest allowed value.
+#--- within-adr-range-low.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x100ffc: { *(.text) }
+}
+
+## .rodata and .text are far apart,
+## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address
diff erence
+## is equal to the lowest allowed value minus one.
+#--- out-of-adr-range-low.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x100ffd: { *(.text) }
+}
+
+## .rodata and .text are close to each other,
+## the adrp + add pair can be relaxed to nop + adr, moreover, the address
diff erence
+## is equal to the highest allowed value.
+#--- within-adr-range-high.t
+SECTIONS {
+ .text 0x1000: { *(.text) }
+ .rodata 0x101003: { *(.rodata) }
+}
+
+## .rodata and .text are far apart,
+## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address
diff erence
+## is equal to the highest allowed value plus one.
+#--- out-of-adr-range-high.t
+SECTIONS {
+ .text 0x1000: { *(.text) }
+ .rodata 0x101004: { *(.rodata) }
+}
+
+#--- a.s
+.rodata
+x:
+.word 10
+.text
+.global _start
+_start:
+ adrp x30, x
+ add x30, x30, :lo12:x
+ adrp x1, x
+ add x1, x1, :lo12:x
+ adrp x15, x
+ add x15, x15, :lo12:x
+ adrp x2, x
+ add x3, x2, :lo12:x
+ adrp x2, x
+ add x2, x3, :lo12:x
diff --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s
index f085f31290db5..56a90aac3876c 100644
--- a/lld/test/ELF/aarch64-adrp-ldr-got.s
+++ b/lld/test/ELF/aarch64-adrp-ldr-got.s
@@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
-# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t -o %t/a
# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
## Symbol 'x' is nonpreemptible, the relaxation should be applied.
@@ -29,8 +29,15 @@
# CHECK-NEXT: adrp x6
# CHECK-NEXT: ldr
+# RUN: ld.lld %t/a.o -T %t/within-adr-range.t -o %t/a
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck --check-prefix=ADR %s
+
+## Symbol 'x' is nonpreemptible, the relaxation should be applied.
+# ADR: nop
+# ADR-NEXT: adr x1
+
## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
-# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t --no-relax -o %t/no-relax
# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
@@ -61,12 +68,20 @@
## This linker script ensures that .rodata and .text are sufficiently (>1M)
## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
-#--- linker.t
+#--- out-of-adr-range.t
SECTIONS {
.rodata 0x1000: { *(.rodata) }
.text 0x200100: { *(.text) }
}
+## This linker script ensures that .rodata and .text are sufficiently (<1M)
+## close to each other so that the adrp + ldr pair can be relaxed to nop + adr.
+#--- within-adr-range.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x2000: { *(.text) }
+}
+
## This linker script ensures that .rodata and .text are sufficiently (>4GB)
## far apart so that the adrp + ldr pair cannot be relaxed.
#--- out-of-range.t
diff --git a/lld/test/ELF/aarch64-copy.s b/lld/test/ELF/aarch64-copy.s
index 591186f1470f0..0d20f00323aa5 100644
--- a/lld/test/ELF/aarch64-copy.s
+++ b/lld/test/ELF/aarch64-copy.s
@@ -2,7 +2,7 @@
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t.o
// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %p/Inputs/relocation-copy.s -o %t2.o
// RUN: ld.lld -shared %t2.o -soname fixed-length-string.so -o %t2.so
-// RUN: ld.lld %t.o %t2.so -o %t
+// RUN: ld.lld --no-relax %t.o %t2.so -o %t
// RUN: llvm-readobj -S -r --symbols %t | FileCheck %s
// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CODE %s
// RUN: llvm-objdump -s --section=.rodata %t | FileCheck --check-prefix=RODATA %s
diff --git a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
index 284d9a8d7edce..930709badcd11 100644
--- a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
+++ b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
@@ -1,11 +1,11 @@
# REQUIRES: aarch64
# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
-# RUN: ld.lld %t.o -o %t
+# RUN: ld.lld --no-relax %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC
-# RUN: ld.lld -pie %t.o -o %t
+# RUN: ld.lld -pie --no-relax %t.o -o %t
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC
More information about the llvm-commits
mailing list