[lld] 8acc3b4 - [lld][ELF] Support adrp+ldr GOT optimization for AArch64

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 9 21:22:00 PST 2022


Author: Alexander Shaposhnikov
Date: 2022-01-10T05:20:37Z
New Revision: 8acc3b4ab0c76b9c2a54182e31a02f90ebb96329

URL: https://github.com/llvm/llvm-project/commit/8acc3b4ab0c76b9c2a54182e31a02f90ebb96329
DIFF: https://github.com/llvm/llvm-project/commit/8acc3b4ab0c76b9c2a54182e31a02f90ebb96329.diff

LOG: [lld][ELF] Support adrp+ldr GOT optimization for AArch64

This diff adds first bits to support relocation relaxations for AArch64
discussed on https://github.com/ARM-software/abi-aa/pull/106.
In particular, the case of

adrp x0, :got: symbol
ldr x0, [x0, :got_lo12: symbol]

is handled.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D112063

Added: 
    lld/test/ELF/aarch64-adrp-ldr-got-symbols.s
    lld/test/ELF/aarch64-adrp-ldr-got.s

Modified: 
    lld/ELF/Arch/AArch64.cpp
    lld/ELF/InputSection.cpp
    lld/ELF/Target.h

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index ca3a6aa58dc5b..96e16f760a82a 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
   llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
 }
 
+AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
+  if (!config->relax || config->emachine != EM_AARCH64) {
+    safeToRelaxAdrpLdr = false;
+    return;
+  }
+  // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
+  // always appear in pairs.
+  size_t i = 0;
+  const size_t size = relocs.size();
+  for (; i != size; ++i) {
+    if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
+      if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
+        ++i;
+        continue;
+      }
+      break;
+    } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
+      break;
+    }
+  }
+  safeToRelaxAdrpLdr = i == size;
+}
+
+bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
+                                     const Relocation &ldrRel, uint64_t secAddr,
+                                     uint8_t *buf) const {
+  if (!safeToRelaxAdrpLdr)
+    return false;
+
+  // When the definition of sym is not preemptible then we may
+  // be able to relax
+  // ADRP xn, :got: sym
+  // LDR xn, [ xn :got_lo12: sym]
+  // to
+  // ADRP xn, sym
+  // ADD xn, xn, :lo_12: sym
+
+  if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
+      ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
+    return false;
+  // Check if the relocations apply to consecutive instructions.
+  if (adrpRel.offset + 4 != ldrRel.offset)
+    return false;
+  // Check if the relocations reference the same symbol and
+  // skip undefined, preemptible and STT_GNU_IFUNC symbols.
+  if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
+      adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
+    return false;
+  // Check if the addends of the both instructions are zero.
+  if (adrpRel.addend != 0 || ldrRel.addend != 0)
+    return false;
+  uint32_t adrpInstr = read32le(buf + adrpRel.offset);
+  uint32_t ldrInstr = read32le(buf + ldrRel.offset);
+  // Check if the first instruction is ADRP and the second instruction is LDR.
+  if ((adrpInstr & 0x9f000000) != 0x90000000 ||
+      (ldrInstr & 0x3b000000) != 0x39000000)
+    return false;
+  // Check the value of the sf bit.
+  if (!(ldrInstr >> 31))
+    return false;
+  uint32_t adrpDestReg = adrpInstr & 0x1f;
+  uint32_t ldrDestReg = ldrInstr & 0x1f;
+  uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
+  // Check if ADPR and LDR use the same register.
+  if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
+    return false;
+
+  Symbol &sym = *adrpRel.sym;
+  // Check if the address 
diff erence is within 4GB range.
+  int64_t val =
+      getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
+  if (val != llvm::SignExtend64(val, 33))
+    return false;
+
+  Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
+                           adrpRel.offset, /*addend=*/0, &sym};
+  Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
+                       /*addend=*/0, &sym};
+
+  // adrp x_<dest_reg>
+  write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
+  // add x_<dest reg>, x_<dest reg>
+  write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
+
+  target->relocate(buf + adrpSymRel.offset, adrpSymRel,
+                   SignExtend64(getAArch64Page(sym.getVA()) -
+                                    getAArch64Page(secAddr + adrpSymRel.offset),
+                                64));
+  target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
+  return true;
+}
+
 // AArch64 may use security features in variant PLT sequences. These are:
 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used

diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 844388330d6fa..bd1079c9a1dbe 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
   const unsigned bits = config->wordsize * 8;
   const TargetInfo &target = *elf::target;
   uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
-
-  for (const Relocation &rel : relocations) {
+  AArch64Relaxer aarch64relaxer(relocations);
+  for (size_t i = 0, size = relocations.size(); i != size; ++i) {
+    const Relocation &rel = relocations[i];
     if (rel.expr == R_NONE)
       continue;
     uint64_t offset = rel.offset;
     uint8_t *bufLoc = buf + offset;
 
-    uint64_t addrLoc = getOutputSection()->addr + offset;
+    uint64_t secAddr = getOutputSection()->addr;
     if (auto *sec = dyn_cast<InputSection>(this))
-      addrLoc += sec->outSecOff;
+      secAddr += sec->outSecOff;
+    const uint64_t addrLoc = secAddr + offset;
     const uint64_t targetVA =
         SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc,
-                                      *rel.sym, rel.expr), bits);
-
+                                      *rel.sym, rel.expr),
+                     bits);
     switch (rel.expr) {
     case R_RELAX_GOT_PC:
     case R_RELAX_GOT_PC_NOPIC:
       target.relaxGot(bufLoc, rel, targetVA);
       break;
+    case R_AARCH64_GOT_PAGE_PC:
+      if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr(
+                              rel, relocations[i + 1], secAddr, buf)) {
+        ++i;
+        continue;
+      }
+      target.relocate(bufLoc, rel, targetVA);
+      break;
     case R_PPC64_RELAX_GOT_PC: {
       // The R_PPC64_PCREL_OPT relocation must appear immediately after
       // R_PPC64_GOT_PCREL34 in the relocations table at the same offset.

diff  --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index e0e97301ca98c..f7b947ec3aa28 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -221,6 +221,16 @@ void addPPC64SaveRestore();
 uint64_t getPPC64TocBase();
 uint64_t getAArch64Page(uint64_t expr);
 
+class AArch64Relaxer {
+  bool safeToRelaxAdrpLdr = true;
+
+public:
+  explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
+
+  bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
+                       uint64_t secAddr, uint8_t *buf) const;
+};
+
 extern const TargetInfo *target;
 TargetInfo *getTarget();
 

diff  --git a/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s
new file mode 100644
index 0000000000000..bff7c2fcbb71a
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s
@@ -0,0 +1,70 @@
+## This test verifies that the pair adrp + ldr is relaxed/not relaxed
+## depending on the target symbol properties.
+
+# REQUIRES: aarch64
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o
+
+# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so
+# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \
+# RUN:   FileCheck --check-prefix=LIB %s
+
+## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied.
+LIB:      adrp   x0
+LIB-NEXT: add    x0
+
+## Symbol 'global_sym' is preemptible, no relaxations should be applied.
+LIB-NEXT: adrp   x1
+LIB-NEXT: ldr    x1
+
+## Symbol 'undefined_sym' is undefined, no relaxations should be applied.
+LIB-NEXT: adrp   x2
+LIB-NEXT: ldr    x2
+
+## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied.
+LIB-NEXT: adrp   x3
+LIB-NEXT: ldr    x3
+
+# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols
+# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \
+# RUN:   FileCheck --check-prefix=EXE %s
+
+## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied.
+EXE:      adrp   x1
+EXE-NEXT: add    x1
+
+## The linker script ensures that .rodata and .text are sufficiently (>1MB)
+## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
+#--- linker.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x300100: { *(.text) }
+}
+
+#--- symbols.s
+.rodata
+.hidden hidden_sym
+hidden_sym:
+.word 10
+
+.global global_sym
+global_sym:
+.word 10
+
+.text
+.type ifunc_sym STT_GNU_IFUNC
+.hidden ifunc_sym
+ifunc_sym:
+  nop
+
+.global _start
+_start:
+  adrp    x0, :got:hidden_sym
+  ldr     x0, [x0, #:got_lo12:hidden_sym]
+  adrp    x1, :got:global_sym
+  ldr     x1, [x1, #:got_lo12:global_sym]
+  adrp    x2, :got:undefined_sym
+  ldr     x2, [x2, #:got_lo12:undefined_sym]
+  adrp    x3, :got:ifunc_sym
+  ldr     x3, [x3, #:got_lo12:ifunc_sym]

diff  --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s
new file mode 100644
index 0000000000000..c789e720e02e7
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-ldr-got.s
@@ -0,0 +1,117 @@
+# REQUIRES: aarch64
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
+
+# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
+
+## Symbol 'x' is nonpreemptible, the relaxation should be applied.
+## This test verifies the encoding when the register x1 is used.
+# CHECK:      adrp   x1
+# CHECK-NEXT: add    x1, x1
+
+## ADRP contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: adrp   x2
+# CHECK-NEXT: ldr
+
+## LDR contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: adrp   x3
+# CHECK-NEXT: ldr
+
+## LDR and ADRP use 
diff erent registers, no relaxations should be applied.
+# CHECK-NEXT: adrp   x4
+# CHECK-NEXT: ldr
+
+## LDR and ADRP use 
diff erent registers, no relaxations should be applied.
+# CHECK-NEXT: adrp   x6
+# CHECK-NEXT: ldr
+
+## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
+# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
+# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
+#   FileCheck --check-prefix=X1-NO-RELAX %s
+
+# X1-NO-RELAX:      adrp   x1
+# X1-NO-RELAX-NEXT: ldr
+
+## Symbol 'x' is nonpreemptible, but the address is not within adrp range.
+# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range
+# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \
+# RUN:   FileCheck --check-prefix=X1-NO-RELAX %s
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld %t/unpaired.o -o %t/unpaired
+# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \
+# RUN:   FileCheck --check-prefix=UNPAIRED %s
+
+# UNPAIRED:         adrp   x0
+# UNPAIRED-NEXT:    b
+# UNPAIRED-NEXT:    adrp   x0
+# UNPAIRED:         ldr	   x0
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr
+# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \
+# RUN:   FileCheck --check-prefix=LONE-LDR %s
+
+# LONE-LDR:         ldr	   x0
+
+## This linker script ensures that .rodata and .text are sufficiently (>1M)
+## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
+#--- linker.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x200100: { *(.text) }
+}
+
+## This linker script ensures that .rodata and .text are sufficiently (>4GB)
+## far apart so that the adrp + ldr pair cannot be relaxed.
+#--- out-of-range.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text   0x100002000: { *(.text) }
+}
+
+#--- a.s
+.rodata
+.hidden x
+x:
+.word 10
+.text
+.global _start
+_start:
+  adrp    x1, :got:x
+  ldr     x1, [x1, #:got_lo12:x]
+  adrp    x2, :got:x+1
+  ldr     x2, [x2, #:got_lo12:x]
+  adrp    x3, :got:x
+  ldr     x3, [x3, #:got_lo12:x+8]
+  adrp    x4, :got:x
+  ldr     x5, [x4, #:got_lo12:x]
+  adrp    x6, :got:x
+  ldr     x6, [x0, #:got_lo12:x]
+
+#--- unpaired.s
+.text
+.hidden x
+x:
+  nop
+.global _start
+_start:
+  adrp    x0, :got:x
+  b L
+  adrp    x0, :got:x
+L:
+  ldr     x0, [x0, #:got_lo12:x]
+
+#--- lone-ldr.s
+.text
+.hidden x
+x:
+  nop
+.global _start
+_start:
+  ldr     x0, [x0, #:got_lo12:x]


        


More information about the llvm-commits mailing list