[PATCH] D39744: [LLD][ELF][AArch64] Add support for AArch64 range extension thunks.
Rafael Avila de Espindola via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 10:24:26 PST 2017
LGTM
Peter Smith via Phabricator <reviews at reviews.llvm.org> writes:
> peter.smith updated this revision to Diff 124540.
> peter.smith added a comment.
>
> Updated diff to use ADRP for position independent code. This thunk has a maximum range of +- 4 Gigabytes which is not the whole address space. However the default small code model only supports programs up to 4 Gigabytes in size and the large code model is not currently implemented in gcc and clang for position independent code so it is safe to use for position independent thunks.
>
> I'll commit tomorrow if there are no further comments.
>
>
> https://reviews.llvm.org/D39744
>
> Files:
> ELF/Arch/AArch64.cpp
> ELF/Thunks.cpp
> test/ELF/aarch64-call26-error.s
> test/ELF/aarch64-call26-thunk.s
> test/ELF/aarch64-jump26-error.s
> test/ELF/aarch64-jump26-thunk.s
> test/ELF/aarch64-thunk-pi.s
> test/ELF/aarch64-thunk-script.s
> test/ELF/aarch64-thunk-section-location.s
>
> Index: test/ELF/aarch64-thunk-section-location.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-section-location.s
> @@ -0,0 +1,41 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: ld.lld %t -o %t2 2>&1
> +// RUN: llvm-objdump -d -start-address=134086664 -stop-address=134086676 -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +
> +// Check that the range extension thunks are dumped close to the aarch64 branch
> +// range of 128 MiB
> + .section .text.1, "ax", %progbits
> + .balign 0x1000
> + .globl _start
> +_start:
> + bl high_target
> + ret
> +
> + .section .text.2, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.2, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.3, "ax", %progbits
> + .space 0x2000000
> +
> + .section .text.4, "ax", %progbits
> + .space 0x2000000 - 0x40000
> +
> + .section .text.5, "ax", %progbits
> + .space 0x40000
> +
> + .section .text.6, "ax", %progbits
> + .balign 0x1000
> +
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + ret
> +
> +// CHECK: __AArch64AbsLongThunk_high_target:
> +// CHECK-NEXT: 7fe0008: 50 00 00 58 ldr x16, #8
> +// CHECK-NEXT: 7fe000c: 00 02 1f d6 br x16
> +// CHECK: $d:
> +// CHECK-NEXT: 7fe0010: 00 10 02 08 .word 0x08021000
> Index: test/ELF/aarch64-thunk-script.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-script.s
> @@ -0,0 +1,41 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: echo "SECTIONS { \
> +// RUN: .text_low 0x2000: { *(.text_low) } \
> +// RUN: .text_high 0x8002000 : { *(.text_high) } \
> +// RUN: } " > %t.script
> +// RUN: ld.lld --script %t.script %t -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +// Check that we have the out of branch range calculation right. The immediate
> +// field is signed so we have a slightly higher negative displacement.
> + .section .text_low, "ax", %progbits
> + .globl _start
> + .type _start, %function
> +_start:
> + // Need thunk to high_target at plt
> + bl high_target
> + ret
> +
> + .section .text_high, "ax", %progbits
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + // No Thunk needed as we are within signed immediate range
> + bl _start
> + ret
> +
> +// CHECK: Disassembly of section .text_low:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT: 2000: 02 00 00 94 bl #8
> +// CHECK-NEXT: 2004: c0 03 5f d6 ret
> +// CHECK: __AArch64AbsLongThunk_high_target:
> +// CHECK-NEXT: 2008: 50 00 00 58 ldr x16, #8
> +// CHECK-NEXT: 200c: 00 02 1f d6 br x16
> +// CHECK: $d:
> +// CHECK-NEXT: 2010: 00 20 00 08 .word 0x08002000
> +// CHECK-NEXT: 2014: 00 00 00 00 .word 0x00000000
> +// CHECK: Disassembly of section .text_high:
> +// CHECK-NEXT: high_target:
> +// CHECK-NEXT: 8002000: 00 00 00 96 bl #-134217728
> +// CHECK-NEXT: 8002004: c0 03 5f d6 ret
> Index: test/ELF/aarch64-thunk-pi.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-thunk-pi.s
> @@ -0,0 +1,91 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t
> +// RUN: echo "SECTIONS { \
> +// RUN: .text_low : { *(.text_low) } \
> +// RUN: .text_high 0x10000000 : { *(.text_high) } \
> +// RUN: } " > %t.script
> +// RUN: ld.lld --script %t.script --shared %t -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +// Check that Position Independent thunks are generated for shared libraries.
> + .section .text_low, "ax", %progbits
> + .globl low_target
> + .type low_target, %function
> +low_target:
> + // Need thunk to high_target at plt
> + bl high_target
> + ret
> +// CHECK: low_target:
> +// CHECK-NEXT: 0: 04 00 00 94 bl #16
> +// CHECK-NEXT: 4: c0 03 5f d6 ret
> +
> + .hidden low_target2
> + .globl low_target2
> + .type low_target2, %function
> +low_target2:
> + // Need thunk to high_target
> + bl high_target2
> + ret
> +// CHECK: low_target2:
> +// CHECK-NEXT: 8: 05 00 00 94 bl #20
> +// CHECK-NEXT: c: c0 03 5f d6 ret
> +
> +// Expect range extension thunks for .text_low
> +// adrp calculation is (PC + signed immediate) & (!0xfff)
> +// CHECK: __AArch64ADRPThunk_high_target:
> +// CHECK-NEXT: 10: 10 00 08 90 adrp x16, #268435456
> +// CHECK-NEXT: 14: 10 82 04 91 add x16, x16, #288
> +// CHECK-NEXT: 18: 00 02 1f d6 br x16
> +// CHECK: __AArch64ADRPThunk_high_target2:
> +// CHECK-NEXT: 1c: 10 00 08 90 adrp x16, #268435456
> +// CHECK-NEXT: 20: 10 22 00 91 add x16, x16, #8
> +// CHECK-NEXT: 24: 00 02 1f d6 br x16
> +
> +
> + .section .text_high, "ax", %progbits
> + .globl high_target
> + .type high_target, %function
> +high_target:
> + // No thunk needed as we can reach low_target at plt
> + bl low_target
> + ret
> +// CHECK: high_target:
> +// CHECK-NEXT: 10000000: 4c 00 00 94 bl #304
> +// CHECK-NEXT: 10000004: c0 03 5f d6 ret
> +
> + .hidden high_target2
> + .globl high_target2
> + .type high_target2, %function
> +high_target2:
> + // Need thunk to low_target
> + bl low_target2
> + ret
> +// CHECK: high_target2:
> +// CHECK-NEXT: 10000008: 02 00 00 94 bl #8
> +// CHECK-NEXT: 1000000c: c0 03 5f d6 ret
> +
> +// Expect Thunk for .text.high
> +
> +// CHECK: __AArch64ADRPThunk_low_target2:
> +// CHECK-NEXT: 10000010: 10 00 f8 90 adrp x16, #-268435456
> +// CHECK-NEXT: 10000014: 10 22 00 91 add x16, x16, #8
> +// CHECK-NEXT: 10000018: 00 02 1f d6 br x16
> +
> +// CHECK: Disassembly of section .plt:
> +// CHECK-NEXT: .plt:
> +// CHECK-NEXT: 10000100: f0 7b bf a9 stp x16, x30, [sp, #-16]!
> +// CHECK-NEXT: 10000104: 10 00 00 90 adrp x16, #0
> +// CHECK-NEXT: 10000108: 11 aa 40 f9 ldr x17, [x16, #336]
> +// CHECK-NEXT: 1000010c: 10 42 05 91 add x16, x16, #336
> +// CHECK-NEXT: 10000110: 20 02 1f d6 br x17
> +// CHECK-NEXT: 10000114: 1f 20 03 d5 nop
> +// CHECK-NEXT: 10000118: 1f 20 03 d5 nop
> +// CHECK-NEXT: 1000011c: 1f 20 03 d5 nop
> +// CHECK-NEXT: 10000120: 10 00 00 90 adrp x16, #0
> +// CHECK-NEXT: 10000124: 11 ae 40 f9 ldr x17, [x16, #344]
> +// CHECK-NEXT: 10000128: 10 62 05 91 add x16, x16, #344
> +// CHECK-NEXT: 1000012c: 20 02 1f d6 br x17
> +// CHECK-NEXT: 10000130: 10 00 00 90 adrp x16, #0
> +// CHECK-NEXT: 10000134: 11 b2 40 f9 ldr x17, [x16, #352]
> +// CHECK-NEXT: 10000138: 10 82 05 91 add x16, x16, #352
> +// CHECK-NEXT: 1000013c: 20 02 1f d6 br x17
> Index: test/ELF/aarch64-jump26-thunk.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-jump26-thunk.s
> @@ -0,0 +1,20 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> +// RUN: ld.lld %t %tabs -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +.text
> +.globl _start
> +_start:
> + b big
> +
> +// CHECK: Disassembly of section .text:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT: 20000: 02 00 00 14 b #8
> +// CHECK: __AArch64AbsLongThunk_big:
> +// CHECK-NEXT: 20008: 50 00 00 58 ldr x16, #8
> +// CHECK-NEXT: 2000c: 00 02 1f d6 br x16
> +// CHECK: $d:
> +// CHECK-NEXT: 20010: 00 00 00 00 .word 0x00000000
> +// CHECK-NEXT: 20014: 10 00 00 00 .word 0x00000010
> Index: test/ELF/aarch64-jump26-error.s
> ===================================================================
> --- test/ELF/aarch64-jump26-error.s
> +++ /dev/null
> @@ -1,11 +0,0 @@
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s
> -// REQUIRES: aarch64
> -
> -.text
> -.globl _start
> -_start:
> - b big
> -
> -// CHECK: R_AARCH64_JUMP26 out of range
> Index: test/ELF/aarch64-call26-thunk.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/aarch64-call26-thunk.s
> @@ -0,0 +1,21 @@
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> +// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> +// RUN: ld.lld %t %tabs -o %t2 2>&1
> +// RUN: llvm-objdump -d -triple=aarch64-pc-freebsd %t2 | FileCheck %s
> +// REQUIRES: aarch64
> +
> +.text
> +.globl _start
> +_start:
> + bl big
> +
> +// CHECK: Disassembly of section .text:
> +// CHECK-NEXT: _start:
> +// CHECK-NEXT: 20000: 02 00 00 94 bl #8
> +// CHECK: __AArch64AbsLongThunk_big:
> +// CHECK-NEXT: 20008: 50 00 00 58 ldr x16, #8
> +// CHECK-NEXT: 2000c: 00 02 1f d6 br x16
> +// CHECK: $d:
> +// CHECK-NEXT: 20010: 00 00 00 00 .word 0x00000000
> +// CHECK-NEXT: 20014: 10 00 00 00 .word 0x00000010
> +
> Index: test/ELF/aarch64-call26-error.s
> ===================================================================
> --- test/ELF/aarch64-call26-error.s
> +++ /dev/null
> @@ -1,11 +0,0 @@
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %S/Inputs/abs.s -o %tabs
> -// RUN: llvm-mc -filetype=obj -triple=aarch64-pc-freebsd %s -o %t
> -// RUN: not ld.lld %t %tabs -o %t2 2>&1 | FileCheck %s
> -// REQUIRES: aarch64
> -
> -.text
> -.globl _start
> -_start:
> - bl big
> -
> -// CHECK: R_AARCH64_CALL26 out of range
> Index: ELF/Thunks.cpp
> ===================================================================
> --- ELF/Thunks.cpp
> +++ ELF/Thunks.cpp
> @@ -48,6 +48,23 @@
>
> namespace {
>
> +// AArch64 long range Thunks
> +class AArch64ABSLongThunk final : public Thunk {
> +public:
> + AArch64ABSLongThunk(Symbol &Dest) : Thunk(Dest) {}
> + uint32_t size() const override { return 16; }
> + void writeTo(uint8_t *Buf, ThunkSection &IS) const override;
> + void addSymbols(ThunkSection &IS) override;
> +};
> +
> +class AArch64ADRPThunk final : public Thunk {
> +public:
> + AArch64ADRPThunk(Symbol &Dest) : Thunk(Dest) {}
> + uint32_t size() const override { return 12; }
> + void writeTo(uint8_t *Buf, ThunkSection &IS) const override;
> + void addSymbols(ThunkSection &IS) override;
> +};
> +
> // Specific ARM Thunk implementations. The naming convention is:
> // Source State, TargetState, Target Requirement, ABS or PI, Range
> class ARMV7ABSLongThunk final : public Thunk {
> @@ -125,6 +142,60 @@
>
> } // end anonymous namespace
>
> +// AArch64 long range Thunks
> +
> +static uint64_t getAArch64ThunkDestVA(const Symbol &S) {
> + uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
> + return V;
> +}
> +
> +void AArch64ABSLongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
> + const uint8_t Data[] = {
> + 0x50, 0x00, 0x00, 0x58, // ldr x16, L0
> + 0x00, 0x02, 0x1f, 0xd6, // br x16
> + 0x00, 0x00, 0x00, 0x00, // L0: .xword S
> + 0x00, 0x00, 0x00, 0x00,
> + };
> + uint64_t S = getAArch64ThunkDestVA(Destination);
> + memcpy(Buf, Data, sizeof(Data));
> + Target->relocateOne(Buf + 8, R_AARCH64_ABS64, S);
> +}
> +
> +void AArch64ABSLongThunk::addSymbols(ThunkSection &IS) {
> + ThunkSym = addSyntheticLocal(
> + Saver.save("__AArch64AbsLongThunk_" + Destination.getName()), STT_FUNC,
> + Offset, size(), &IS);
> + addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS);
> + addSyntheticLocal("$d", STT_NOTYPE, Offset + 8, 0, &IS);
> +}
> +
> +// This Thunk has a maximum range of 4Gb, this is sufficient for all programs
> +// using the small code model, including pc-relative ones. At time of writing
> +// clang and gcc do not support the large code model for position independent
> +// code so it is safe to use this for position independent thunks without
> +// worrying about the destination being more than 4Gb away.
> +void AArch64ADRPThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
> + const uint8_t Data[] = {
> + 0x10, 0x00, 0x00, 0x90, // adrp x16, Dest R_AARCH64_ADR_PREL_PG_HI21(Dest)
> + 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
> + 0x00, 0x02, 0x1f, 0xd6, // br x16
> + };
> + uint64_t S = getAArch64ThunkDestVA(Destination);
> + uint64_t P = ThunkSym->getVA();
> + memcpy(Buf, Data, sizeof(Data));
> + Target->relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21,
> + getAArch64Page(S) - getAArch64Page(P));
> + Target->relocateOne(Buf + 4, R_AARCH64_ADD_ABS_LO12_NC, S);
> +}
> +
> +void AArch64ADRPThunk::addSymbols(ThunkSection &IS)
> +{
> + ThunkSym = addSyntheticLocal(
> + Saver.save("__AArch64ADRPThunk_" + Destination.getName()), STT_FUNC,
> + Offset, size(), &IS);
> + addSyntheticLocal("$x", STT_NOTYPE, Offset, 0, &IS);
> +}
> +
> // ARM Target Thunks
> static uint64_t getARMThunkDestVA(const Symbol &S) {
> uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA();
> @@ -309,6 +380,14 @@
>
> Thunk::~Thunk() = default;
>
> +static Thunk *addThunkAArch64(RelType Type, Symbol &S) {
> + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> + fatal("unrecognized relocation type");
> + if (Config->Pic)
> + return make<AArch64ADRPThunk>(S);
> + return make<AArch64ABSLongThunk>(S);
> +}
> +
> // Creates a thunk for Thumb-ARM interworking.
> static Thunk *addThunkArm(RelType Reloc, Symbol &S) {
> // ARM relocations need ARM to Thumb interworking Thunks.
> @@ -341,7 +420,9 @@
> }
>
> Thunk *addThunk(RelType Type, Symbol &S) {
> - if (Config->EMachine == EM_ARM)
> + if (Config->EMachine == EM_AARCH64)
> + return addThunkAArch64(Type, S);
> + else if (Config->EMachine == EM_ARM)
> return addThunkArm(Type, S);
> else if (Config->EMachine == EM_MIPS)
> return addThunkMips(Type, S);
> Index: ELF/Arch/AArch64.cpp
> ===================================================================
> --- ELF/Arch/AArch64.cpp
> +++ ELF/Arch/AArch64.cpp
> @@ -39,6 +39,9 @@
> void writePltHeader(uint8_t *Buf) const override;
> void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
> int32_t Index, unsigned RelOff) const override;
> + bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
> + uint64_t BranchAddr, const Symbol &S) const override;
> + bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
> bool usesOnlyLowPageBits(RelType Type) const override;
> void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
> RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
> @@ -66,6 +69,12 @@
> // It doesn't seem to be documented anywhere, but tls on aarch64 uses variant
> // 1 of the tls structures and the tcb size is 16.
> TcbSize = 16;
> + NeedsThunks = true;
> +
> + // See comment in Arch/ARM.cpp for a more detailed explanation of
> + // ThunkSectionSpacing. For AArch64 the only branches we are permitted to
> + // Thunk have a range of +/- 128 MiB
> + ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000;
> }
>
> RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
> @@ -181,6 +190,31 @@
> relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr);
> }
>
> +bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
> + uint64_t BranchAddr, const Symbol &S) const {
> + // ELF for the ARM 64-bit architecture, section Call and Jump relocations
> + // only permits range extension thunks for R_AARCH64_CALL26 and
> + // R_AARCH64_JUMP26 relocation types.
> + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> + return false;
> + uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
> + return !inBranchRange(Type, BranchAddr, Dst);
> +}
> +
> +bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
> + if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
> + return true;
> + // The AArch64 call and unconditional branch instructions have a range of
> + // +/- 128 MiB.
> + uint64_t Range = 128 * 1024 * 1024;
> + if (Dst > Src) {
> + // Immediate of branch is signed.
> + Range -= 4;
> + return Dst - Src <= Range;
> + }
> + return Src - Dst <= Range;
> +}
> +
> static void write32AArch64Addr(uint8_t *L, uint64_t Imm) {
> uint32_t ImmLo = (Imm & 0x3) << 29;
> uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
More information about the llvm-commits
mailing list