[lld] 4450a2a - [lld][ELF] Add support for ADRP+ADD optimization for AArch64

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 1 22:18:13 PST 2022


Author: Alexander Shaposhnikov
Date: 2022-02-02T06:09:55Z
New Revision: 4450a2a23df0e7081ca7fee3ec641774afedc2bc

URL: https://github.com/llvm/llvm-project/commit/4450a2a23df0e7081ca7fee3ec641774afedc2bc
DIFF: https://github.com/llvm/llvm-project/commit/4450a2a23df0e7081ca7fee3ec641774afedc2bc.diff

LOG: [lld][ELF] Add support for ADRP+ADD optimization for AArch64

This diff adds support for ADRP+ADD optimization for AArch64 described in
https://github.com/ARM-software/abi-aa/commit/d2ca58c54b8e955cfef25c71822f837ae0439d73
i.e. under appropriate constraints

ADRP  x0, symbol
ADD   x0, x0, :lo12: symbol

can be turned into

NOP
ADR   x0, symbol

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D117614

Added: 
    lld/test/ELF/aarch64-adrp-add.s

Modified: 
    lld/ELF/Arch/AArch64.cpp
    lld/ELF/InputSection.cpp
    lld/ELF/Target.h
    lld/test/ELF/aarch64-adrp-ldr-got.s
    lld/test/ELF/aarch64-copy.s
    lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 784d578312d79..5789bc935b638 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -591,6 +591,55 @@ AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
   safeToRelaxAdrpLdr = i == size;
 }
 
+bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
+                                     const Relocation &addRel, uint64_t secAddr,
+                                     uint8_t *buf) const {
+  // When the address of sym is within the range of ADR then
+  // we may relax
+  // ADRP xn, sym
+  // ADD  xn, xn, :lo12: sym
+  // to
+  // NOP
+  // ADR xn, sym
+  if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
+      addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
+    return false;
+  // Check if the relocations apply to consecutive instructions.
+  if (adrpRel.offset + 4 != addRel.offset)
+    return false;
+  if (adrpRel.sym != addRel.sym)
+    return false;
+  if (adrpRel.addend != 0 || addRel.addend != 0)
+    return false;
+
+  uint32_t adrpInstr = read32le(buf + adrpRel.offset);
+  uint32_t addInstr = read32le(buf + addRel.offset);
+  // Check if the first instruction is ADRP and the second instruction is ADD.
+  if ((adrpInstr & 0x9f000000) != 0x90000000 ||
+      (addInstr & 0xffc00000) != 0x91000000)
+    return false;
+  uint32_t adrpDestReg = adrpInstr & 0x1f;
+  uint32_t addDestReg = addInstr & 0x1f;
+  uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
+  if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
+    return false;
+
+  Symbol &sym = *adrpRel.sym;
+  // Check if the address 
diff erence is within 1MiB range.
+  int64_t val = sym.getVA() - (secAddr + addRel.offset);
+  if (val < -1024 * 1024 || val >= 1024 * 1024)
+    return false;
+
+  Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
+                       /*addend=*/0, &sym};
+  // nop
+  write32le(buf + adrpRel.offset, 0xd503201f);
+  // adr x_<dest_reg>
+  write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
+  target->relocate(buf + adrRel.offset, adrRel, val);
+  return true;
+}
+
 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
                                      const Relocation &ldrRel, uint64_t secAddr,
                                      uint8_t *buf) const {
@@ -657,6 +706,7 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
                                     getAArch64Page(secAddr + adrpSymRel.offset),
                                 64));
   target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
+  tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
   return true;
 }
 

diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 4e5b0f6859227..4b047f75ad69c 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1025,6 +1025,14 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
       }
       target.relocate(bufLoc, rel, targetVA);
       break;
+    case R_AARCH64_PAGE_PC:
+      if (i + 1 < size && aarch64relaxer.tryRelaxAdrpAdd(
+                              rel, relocations[i + 1], secAddr, buf)) {
+        ++i;
+        continue;
+      }
+      target.relocate(bufLoc, rel, targetVA);
+      break;
     case R_PPC64_RELAX_GOT_PC: {
       // The R_PPC64_PCREL_OPT relocation must appear immediately after
       // R_PPC64_GOT_PCREL34 in the relocations table at the same offset.

diff  --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 15330b2be258b..e002114f84394 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -227,6 +227,8 @@ class AArch64Relaxer {
 public:
   explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
 
+  bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
+                       uint64_t secAddr, uint8_t *buf) const;
   bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
                        uint64_t secAddr, uint8_t *buf) const;
 };

diff  --git a/lld/test/ELF/aarch64-adrp-add.s b/lld/test/ELF/aarch64-adrp-add.s
new file mode 100644
index 0000000000000..3b3eb18f04cb5
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-add.s
@@ -0,0 +1,107 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-low.t -o %t/a-low
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=OUT-OF-RANGE
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range-high.t -o %t/a-high
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=OUT-OF-RANGE
+
+# OUT-OF-RANGE:      adrp  x30
+# OUT-OF-RANGE-NEXT: add   x30, x30
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t -o %t/a-low
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-low | FileCheck %s --check-prefix=IN-RANGE-LOW
+
+# IN-RANGE-LOW:      nop
+# IN-RANGE-LOW-NEXT: adr   x30
+# IN-RANGE-LOW-NEXT: adrp  x1
+# IN-RANGE-LOW-NEXT: add   x1
+# IN-RANGE-LOW-NEXT: adrp  x15
+# IN-RANGE-LOW-NEXT: add   x15
+
+## ADRP and ADD use 
diff erent registers, no relaxations should be applied.
+# IN-RANGE-LOW-NEXT: adrp  x2
+# IN-RANGE-LOW-NEXT: add   x3, x2
+
+## ADRP and ADD use 
diff erent registers, no relaxations should be applied.
+# IN-RANGE-LOW-NEXT: adrp  x2
+# IN-RANGE-LOW-NEXT: add   x2, x3
+
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-high.t -o %t/a-high
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a-high | FileCheck %s --check-prefix=IN-RANGE-HIGH
+
+# IN-RANGE-HIGH:      nop
+# IN-RANGE-HIGH-NEXT: adr   x30
+# IN-RANGE-HIGH-NEXT: nop
+# IN-RANGE-HIGH-NEXT: adr   x1
+# IN-RANGE-HIGH-NEXT: nop
+# IN-RANGE-HIGH-NEXT: adr   x15
+
+## ADRP and ADD use 
diff erent registers, no relaxations should be applied.
+# IN-RANGE-HIGH-NEXT: adrp  x2
+# IN-RANGE-HIGH-NEXT: add   x3, x2
+
+## ADRP and ADD use 
diff erent registers, no relaxations should be applied.
+# IN-RANGE-HIGH-NEXT: adrp  x2
+# IN-RANGE-HIGH-NEXT: add   x2, x3
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld %t/a.o -T %t/within-adr-range-low.t --no-relax -o %t/a
+## --no-relax disables relaxations.
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s --check-prefix=OUT-OF-RANGE
+
+## .rodata and .text are close to each other,
+## the adrp + add pair can be relaxed to nop + adr, moreover, the address 
diff erence
+## is equal to the lowest allowed value.
+#--- within-adr-range-low.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x100ffc: { *(.text) }
+}
+
+## .rodata and .text are far apart,
+## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address 
diff erence
+## is equal to the lowest allowed value minus one.
+#--- out-of-adr-range-low.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x100ffd: { *(.text) }
+}
+
+## .rodata and .text are close to each other,
+## the adrp + add pair can be relaxed to nop + adr, moreover, the address 
diff erence
+## is equal to the highest allowed value.
+#--- within-adr-range-high.t
+SECTIONS {
+ .text   0x1000: { *(.text) }
+ .rodata 0x101003: { *(.rodata) }
+}
+
+## .rodata and .text are far apart,
+## the adrp + add pair cannot be relaxed to nop + adr, moreover, the address 
diff erence
+## is equal to the highest allowed value plus one.
+#--- out-of-adr-range-high.t
+SECTIONS {
+ .text   0x1000: { *(.text) }
+ .rodata 0x101004: { *(.rodata) }
+}
+
+#--- a.s
+.rodata
+x:
+.word 10
+.text
+.global _start
+_start:
+  adrp    x30, x
+  add     x30, x30, :lo12:x
+  adrp    x1, x
+  add     x1, x1, :lo12:x
+  adrp    x15, x
+  add     x15, x15, :lo12:x
+  adrp    x2, x
+  add     x3, x2, :lo12:x
+  adrp    x2, x
+  add     x2, x3, :lo12:x

diff  --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s
index f085f31290db5..56a90aac3876c 100644
--- a/lld/test/ELF/aarch64-adrp-ldr-got.s
+++ b/lld/test/ELF/aarch64-adrp-ldr-got.s
@@ -5,7 +5,7 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
 
-# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t -o %t/a
 # RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
 
 ## Symbol 'x' is nonpreemptible, the relaxation should be applied.
@@ -29,8 +29,15 @@
 # CHECK-NEXT: adrp   x6
 # CHECK-NEXT: ldr
 
+# RUN: ld.lld %t/a.o -T %t/within-adr-range.t -o %t/a
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck --check-prefix=ADR %s
+
+## Symbol 'x' is nonpreemptible, the relaxation should be applied.
+# ADR:        nop
+# ADR-NEXT:   adr    x1
+
 ## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
-# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
+# RUN: ld.lld %t/a.o -T %t/out-of-adr-range.t --no-relax -o %t/no-relax
 # RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
 # RUN:   FileCheck --check-prefix=X1-NO-RELAX %s
 
@@ -61,12 +68,20 @@
 
 ## This linker script ensures that .rodata and .text are sufficiently (>1M)
 ## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
-#--- linker.t
+#--- out-of-adr-range.t
 SECTIONS {
  .rodata 0x1000: { *(.rodata) }
  .text   0x200100: { *(.text) }
 }
 
+## This linker script ensures that .rodata and .text are sufficiently (<1M)
+## close to each other so that the adrp + ldr pair can be relaxed to nop + adr.
+#--- within-adr-range.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x2000: { *(.text) }
+}
+
 ## This linker script ensures that .rodata and .text are sufficiently (>4GB)
 ## far apart so that the adrp + ldr pair cannot be relaxed.
 #--- out-of-range.t

diff  --git a/lld/test/ELF/aarch64-copy.s b/lld/test/ELF/aarch64-copy.s
index 591186f1470f0..0d20f00323aa5 100644
--- a/lld/test/ELF/aarch64-copy.s
+++ b/lld/test/ELF/aarch64-copy.s
@@ -2,7 +2,7 @@
 // RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %p/Inputs/relocation-copy.s -o %t2.o
 // RUN: ld.lld -shared %t2.o -soname fixed-length-string.so -o %t2.so
-// RUN: ld.lld %t.o %t2.so -o %t
+// RUN: ld.lld --no-relax %t.o %t2.so -o %t
 // RUN: llvm-readobj -S -r --symbols %t | FileCheck %s
 // RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CODE %s
 // RUN: llvm-objdump -s --section=.rodata %t | FileCheck --check-prefix=RODATA %s

diff  --git a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
index 284d9a8d7edce..930709badcd11 100644
--- a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
+++ b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
@@ -1,11 +1,11 @@
 # REQUIRES: aarch64
 # RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
 
-# RUN: ld.lld %t.o -o %t
+# RUN: ld.lld --no-relax %t.o -o %t
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE
 # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC
 
-# RUN: ld.lld -pie %t.o -o %t
+# RUN: ld.lld -pie --no-relax %t.o -o %t
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE
 # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC
 


        


More information about the llvm-commits mailing list