[PATCH] D39744: [LLD][ELF][AArch64] Add support for AArch64 range extension thunks.

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 28 10:24:26 PST 2017


LGTM

Peter Smith via Phabricator <reviews at reviews.llvm.org> writes:

> peter.smith updated this revision to Diff 124540.
> peter.smith added a comment.
>
> Updated diff to use ADRP for position independent code. This thunk has a maximum range of +- 4 Gigabytes which is not the whole address space. However the default small code model only supports programs up to 4 Gigabytes in size and the large code model is not currently implemented in gcc and clang for position independent code so it is safe to use for position independent thunks.
>
> I'll commit tomorrow if there are no further comments.
>
>
> https://reviews.llvm.org/D39744
>
> Files:
>   ELF/Arch/AArch64.cpp
>   ELF/Thunks.cpp
>   test/ELF/aarch64-call26-error.s
>   test/ELF/aarch64-call26-thunk.s
>   test/ELF/aarch64-jump26-error.s
>   test/ELF/aarch64-jump26-thunk.s
>   test/ELF/aarch64-thunk-pi.s
>   test/ELF/aarch64-thunk-script.s
>   test/ELF/aarch64-thunk-section-location.s
>
> Index: test/ELF/aarch64-thunk-section-location.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-section-location.s
> @@ -0,0 +1,41 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: ld.lld %t -o %t2 2>&1
> +// RUN: llvm-objdump -d  -start-address=134086664 -stop-address=134086676 -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +
> +// Check that the range extension thunks are dumped close to the aarch64 branch
> +// range of 128 MiB
> + .section .text.1, "ax", %progbits
> + .balign 0x1000
> + .globl _start
> +_start:
> + bl high_target
> + ret
> +
> + .section .text.2, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.2, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.3, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.4, "ax", %progbits
> + .space 0x2000000 - 0x40000
> +
> + .section .text.5, "ax", %progbits
> + .space 0x40000
> +
> + .section .text.6, "ax", %progbits
> + .balign 0x1000
> +
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + ret
> +
> +// CHECK: __AArch64AbsLongThunk_high_target:
> +// CHECK-NEXT:  7fe0008:        50 00 00 58     ldr     x16, #8
> +// CHECK-NEXT:  7fe000c:        00 02 1f d6     br      x16
> +// CHECK: $d:
> +// CHECK-NEXT:  7fe0010:        00 10 02 08     .word   0x08021000
> Index: test/ELF/aarch64-thunk-script.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-script.s
> @@ -0,0 +1,41 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: echo "SECTIONS { \
> +// RUN:       .text_low 0x2000: { *(.text_low) } \
> +// RUN:       .text_high 0x8002000 : { *(.text_high) } \
> +// RUN:       } " > %t.script
> +// RUN: ld.lld --script %t.script %t -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +// Check that we have the out of branch range calculation right. The immediate
> +// field is signed so we have a slightly higher negative displacement.
> + .section .text_low, "ax", %progbits
> + .globl _start
> + .type _start, %function
> +_start:
> + // Need thunk to high_target at plt
> + bl high_target
> + ret
> +
> + .section .text_high, "ax", %progbits
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + // No Thunk needed as we are within signed immediate range
> + bl _start
> + ret
> +
> +// CHECK: Disassembly of section .text_low:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT:     2000:       02 00 00 94     bl      #8
> +// CHECK-NEXT:     2004:       c0 03 5f d6     ret
> +// CHECK: __AArch64AbsLongThunk_high_target:
> +// CHECK-NEXT:     2008:       50 00 00 58     ldr     x16, #8
> +// CHECK-NEXT:     200c:       00 02 1f d6     br      x16
> +// CHECK: $d:
> +// CHECK-NEXT:     2010:       00 20 00 08     .word   0x08002000
> +// CHECK-NEXT:     2014:       00 00 00 00     .word   0x00000000
> +// CHECK: Disassembly of section .text_high:
> +// CHECK-NEXT: high_target:
> +// CHECK-NEXT:  8002000:       00 00 00 96     bl      #-134217728
> +// CHECK-NEXT:  8002004:       c0 03 5f d6     ret
> Index: test/ELF/aarch64-thunk-pi.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-pi.s
> @@ -0,0 +1,91 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: echo "SECTIONS { \
> +// RUN:       .text_low : { *(.text_low) } \
> +// RUN:       .text_high 0x10000000 : { *(.text_high) } \
> +// RUN:       } " > %t.script
> +// RUN: ld.lld --script %t.script --shared %t -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +// Check that Position Independent thunks are generated for shared libraries.
> + .section .text_low, "ax", %progbits
> + .globl low_target
> + .type low_target, %function
> +low_target:
> + // Need thunk to high_target at plt
> + bl high_target
> + ret
> +// CHECK: low_target:
> +// CHECK-NEXT:        0:        04 00 00 94     bl      #16
> +// CHECK-NEXT:        4:        c0 03 5f d6     ret
> +
> + .hidden low_target2
> + .globl low_target2
> + .type low_target2, %function
> +low_target2:
> + // Need thunk to high_target
> + bl high_target2
> + ret
> +// CHECK: low_target2:
> +// CHECK-NEXT:        8:        05 00 00 94     bl      #20
> +// CHECK-NEXT:        c:        c0 03 5f d6     ret
> +
> +// Expect range extension thunks for .text_low
> +// adrp calculation is (PC + signed immediate) & (!0xfff)
> +// CHECK: __AArch64ADRPThunk_high_target:
> +// CHECK-NEXT:       10:       10 00 08 90     adrp    x16, #268435456
> +// CHECK-NEXT:       14:       10 82 04 91     add     x16, x16, #288
> +// CHECK-NEXT:       18:       00 02 1f d6     br      x16
> +// CHECK: __AArch64ADRPThunk_high_target2:
> +// CHECK-NEXT:       1c:       10 00 08 90     adrp    x16, #268435456
> +// CHECK-NEXT:       20:       10 22 00 91     add     x16, x16, #8
> +// CHECK-NEXT:       24:       00 02 1f d6     br      x16
> +
> +
> + .section .text_high, "ax", %progbits
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + // No thunk needed as we can reach low_target at plt
> + bl low_target
> + ret
> +// CHECK: high_target:
> +// CHECK-NEXT: 10000000:        4c 00 00 94     bl      #304
> +// CHECK-NEXT: 10000004:        c0 03 5f d6     ret
> +
> + .hidden high_target2
> + .globl high_target2
> + .type high_target2, %function
> +high_target2:
> + // Need thunk to low_target
> + bl low_target2
> + ret
> +// CHECK: high_target2:
> +// CHECK-NEXT: 10000008:        02 00 00 94     bl      #8
> +// CHECK-NEXT: 1000000c:        c0 03 5f d6     ret
> +
> +// Expect Thunk for .text.high
> +
> +// CHECK: __AArch64ADRPThunk_low_target2:
> +// CHECK-NEXT: 10000010:	10 00 f8 90 	adrp	x16, #-268435456
> +// CHECK-NEXT: 10000014:	10 22 00 91 	add	x16, x16, #8
> +// CHECK-NEXT: 10000018:	00 02 1f d6 	br	x16
> +
> +// CHECK: Disassembly of section .plt:
> +// CHECK-NEXT: .plt:
> +// CHECK-NEXT: 10000100:        f0 7b bf a9     stp     x16, x30, [sp, #-16]!
> +// CHECK-NEXT: 10000104:        10 00 00 90     adrp    x16, #0
> +// CHECK-NEXT: 10000108:        11 aa 40 f9     ldr     x17, [x16, #336]
> +// CHECK-NEXT: 1000010c:        10 42 05 91     add     x16, x16, #336
> +// CHECK-NEXT: 10000110:        20 02 1f d6     br      x17
> +// CHECK-NEXT: 10000114:        1f 20 03 d5     nop
> +// CHECK-NEXT: 10000118:        1f 20 03 d5     nop
> +// CHECK-NEXT: 1000011c:        1f 20 03 d5     nop
> +// CHECK-NEXT: 10000120:        10 00 00 90     adrp    x16, #0
> +// CHECK-NEXT: 10000124:        11 ae 40 f9     ldr     x17, [x16, #344]
> +// CHECK-NEXT: 10000128:        10 62 05 91     add     x16, x16, #344
> +// CHECK-NEXT: 1000012c:        20 02 1f d6     br      x17
> +// CHECK-NEXT: 10000130:        10 00 00 90     adrp    x16, #0
> +// CHECK-NEXT: 10000134:        11 b2 40 f9     ldr     x17, [x16, #352]
> +// CHECK-NEXT: 10000138:        10 82 05 91     add     x16, x16, #352
> +// CHECK-NEXT: 1000013c:        20 02 1f d6     br      x17
> Index: test/ELF/aarch64-jump26-thunk.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-jump26-thunk.s
> @@ -0,0 +1,20 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> +// RUN: ld.lld %t %tabs -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +.text
> +.globl _start
> +_start:
> +    b big
> +
> +// CHECK: Disassembly of section .text:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT:    20000:        02 00 00 14     b       #8
> +// CHECK: __AArch64AbsLongThunk_big:
> +// CHECK-NEXT:    20008:        50 00 00 58     ldr     x16, #8
> +// CHECK-NEXT:    2000c:        00 02 1f d6     br      x16
> +// CHECK: $d:
> +// CHECK-NEXT:    20010:        00 00 00 00     .word   0x00000000
> +// CHECK-NEXT:    20014:        10 00 00 00     .word   0x00000010
> Index: test/ELF/aarch64-jump26-error.s
> ===================================================================
> --- test/ELF/aarch64-jump26-error.s
> +++ /dev/null
> @@ -1,11 +0,0 @@
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s
> -// REQUIRES: aarch64
> -
> -.text
> -.globl _start
> -_start:
> -    b big
> -
> -// CHECK: R_AARCH64_JUMP26 out of range
> Index: test/ELF/aarch64-call26-thunk.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-call26-thunk.s
> @@ -0,0 +1,21 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> +// RUN: ld.lld %t %tabs -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +.text
> +.globl _start
> +_start:
> +    bl big
> +
> +// CHECK: Disassembly of section .text:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT:    20000:        02 00 00 94     bl      #8
> +// CHECK: __AArch64AbsLongThunk_big:
> +// CHECK-NEXT:    20008:        50 00 00 58     ldr     x16, #8
> +// CHECK-NEXT:    2000c:        00 02 1f d6     br      x16
> +// CHECK: $d:
> +// CHECK-NEXT:    20010:        00 00 00 00     .word   0x00000000
> +// CHECK-NEXT:    20014:        10 00 00 00     .word   0x00000010
> +
> Index: test/ELF/aarch64-call26-error.s
> ===================================================================
> --- test/ELF/aarch64-call26-error.s
> +++ /dev/null
> @@ -1,11 +0,0 @@
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s
> -// REQUIRES: aarch64
> -
> -.text
> -.globl _start
> -_start:
> -    bl big
> -
> -// CHECK: R_AARCH64_CALL26 out of range
> Index: ELF/Thunks.cpp
> ===================================================================
> --- ELF/Thunks.cpp
> +++ ELF/Thunks.cpp
> @@ -48,6 +48,23 @@
>  
>  namespace {
>  
> +// AArch64 long range Thunks
> +class AArch64ABSLongThunk final : public Thunk {
> +public:
> +  AArch64ABSLongThunk(Symbol &Dest) : Thunk(Dest) {}
> +  uint32_t size() const override { return 16; }
> +  void writeTo(uint8_t *Buf, ThunkSection &IS) const override;
> +  void addSymbols(ThunkSection &IS) override;
> +};
> +
> +class AArch64ADRPThunk final : public Thunk {
> +public:
> +  AArch64ADRPThunk(Symbol &Dest) : Thunk(Dest) {}
> +  uint32_t size() const override { return 12; }
> +  void writeTo(uint8_t *Buf, ThunkSection &IS) const override;
> +  void addSymbols(ThunkSection &IS) override;
> +};
> +
>  // Specific ARM Thunk implementations. The naming convention is:
>  // Source State, TargetState, Target Requirement, ABS or PI, Range
>  class ARMV7ABSLongThunk final : public Thunk {
> @@ -125,6 +142,60 @@
>  
>  } // end anonymous namespace
>  
> +// AArch64 long range Thunks
> +
> +static uint64_t getAArch64ThunkDestVA(const Symbol &S) {
> +  uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
> +  return V;
> +}
> +
> +void AArch64ABSLongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
> +  const uint8_t Data[] = {
> +    0x50, 0x00, 0x00, 0x58, //     ldr x16, L0
> +    0x00, 0x02, 0x1f, 0xd6, //     br  x16
> +    0x00, 0x00, 0x00, 0x00, // L0: .xword S
> +    0x00, 0x00, 0x00, 0x00,
> +  };
> +  uint64_t S = getAArch64ThunkDestVA(Destination);
> +  memcpy(Buf, Data, sizeof(Data));
> +  Target->relocateOne(Buf + 8, R_AARCH64_ABS64, S);
> +}
> +
> +void AArch64ABSLongThunk::addSymbols(ThunkSection &IS) {
> +  ThunkSym = addSyntheticLocal(
> +      Saver.save("__AArch64AbsLongThunk_" + Destination.getName()), STT_FUNC,
> +      Offset, size(), &IS);
> +  addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS);
> +  addSyntheticLocal("$d", STT_NOTYPE, Offset + 8, 0, &IS);
> +}
> +
> +// This Thunk has a maximum range of 4Gb, this is sufficient for all programs
> +// using the small code model, including pc-relative ones. At time of writing
> +// clang and gcc do not support the large code model for position independent
> +// code so it is safe to use this for position independent thunks without
> +// worrying about the destination being more than 4Gb away.
> +void AArch64ADRPThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
> +  const uint8_t Data[] = {
> +      0x10, 0x00, 0x00, 0x90, // adrp x16, Dest R_AARCH64_ADR_PREL_PG_HI21(Dest)
> +      0x10, 0x02, 0x00, 0x91, // add  x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
> +      0x00, 0x02, 0x1f, 0xd6, // br   x16
> +  };
> +  uint64_t S = getAArch64ThunkDestVA(Destination);
> +  uint64_t P = ThunkSym->getVA();
> +  memcpy(Buf, Data, sizeof(Data));
> +  Target->relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21,
> +                      getAArch64Page(S) - getAArch64Page(P));
> +  Target->relocateOne(Buf + 4, R_AARCH64_ADD_ABS_LO12_NC, S);
> +}
> +
> +void AArch64ADRPThunk::addSymbols(ThunkSection &IS)
> +{
> +  ThunkSym = addSyntheticLocal(
> +      Saver.save("__AArch64ADRPThunk_" + Destination.getName()), STT_FUNC,
> +      Offset, size(), &IS);
> +  addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS);
> +}
> +
>  // ARM Target Thunks
>  static uint64_t getARMThunkDestVA(const Symbol &S) {
>    uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
> @@ -309,6 +380,14 @@
>  
>  Thunk::~Thunk() = default;
>  
> +static Thunk *addThunkAArch64(RelType Type, Symbol &S) {
> +  if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> +    fatal("unrecognized relocation type");
> +  if (Config->Pic)
> +    return make<AArch64ADRPThunk>(S);
> +  return make<AArch64ABSLongThunk>(S);
> +}
> +
>  // Creates a thunk for Thumb-ARM interworking.
>  static Thunk *addThunkArm(RelType Reloc, Symbol &S) {
>    // ARM relocations need ARM to Thumb interworking Thunks.
> @@ -341,7 +420,9 @@
>  }
>  
>  Thunk *addThunk(RelType Type, Symbol &S) {
> -  if (Config->EMachine == EM_ARM)
> +  if (Config->EMachine == EM_AARCH64)
> +    return addThunkAArch64(Type, S);
> +  else if (Config->EMachine == EM_ARM)
>      return addThunkArm(Type, S);
>    else if (Config->EMachine == EM_MIPS)
>      return addThunkMips(Type, S);
> Index: ELF/Arch/AArch64.cpp
> ===================================================================
> --- ELF/Arch/AArch64.cpp
> +++ ELF/Arch/AArch64.cpp
> @@ -39,6 +39,9 @@
>    void writePltHeader(uint8_t *Buf) const override;
>    void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
>                  int32_t Index, unsigned RelOff) const override;
> +  bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
> +                  uint64_t BranchAddr, const Symbol &S) const override;
> +  bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
>    bool usesOnlyLowPageBits(RelType Type) const override;
>    void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
>    RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
> @@ -66,6 +69,12 @@
>    // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant
>    // 1 of the tls structures and the tcb size is 16.
>    TcbSize = 16;
> +  NeedsThunks = true;
> +
> +  // See comment in Arch/ARM.cpp for a more detailed explanation of
> +  // ThunkSectionSpacing. For AArch64 the only branches we are permitted to
> +  // Thunk have a range of +/- 128 MiB
> +  ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000;
>  }
>  
>  RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
> @@ -181,6 +190,31 @@
>    relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr);
>  }
>  
> +bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
> +                         uint64_t BranchAddr, const Symbol &S) const {
> +  // ELF for the ARM 64-bit architecture, section Call and Jump relocations
> +  // only permits range extension thunks for R_AARCH64_CALL26 and
> +  // R_AARCH64_JUMP26 relocation types.
> +  if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> +    return false;
> +  uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
> +  return !inBranchRange(Type, BranchAddr, Dst);
> +}
> +
> +bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
> +  if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> +    return true;
> +  // The AArch64 call and unconditional branch instructions have a range of
> +  // +/- 128 MiB.
> +  uint64_t Range = 128 * 1024 * 1024;
> +  if (Dst > Src) {
> +    // Immediate of branch is signed.
> +    Range -= 4;
> +    return Dst - Src <= Range;
> +  }
> +  return Src - Dst <= Range;
> +}
> +
>  static void write32AArch64Addr(uint8_t *L, uint64_t Imm) {
>    uint32_t ImmLo = (Imm & 0x3) << 29;
>    uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;


More information about the llvm-commits mailing list