[lld] 38ed1db - [ELF] Support non-RAX/non-adjacent R_X86_64_GOTPC32_TLSDESC/R_X86_64_TLSDESC_CALL
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 23 10:30:19 PST 2021
Author: Fangrui Song
Date: 2021-11-23T10:30:11-08:00
New Revision: 38ed1db7e8740fce236d1893ff9e20cc22ef0ada
URL: https://github.com/llvm/llvm-project/commit/38ed1db7e8740fce236d1893ff9e20cc22ef0ada
DIFF: https://github.com/llvm/llvm-project/commit/38ed1db7e8740fce236d1893ff9e20cc22ef0ada.diff
LOG: [ELF] Support non-RAX/non-adjacent R_X86_64_GOTPC32_TLSDESC/R_X86_64_TLSDESC_CALL
The current TLSDESC optimization code assumes:
```
leaq x at tlsdesc(%rip), %rax
call *x at tlscall(%rax) # adjacent
```
>From https://gitlab.freedesktop.org/mesa/mesa/-/issues/5665 , it seems that the
two instructions may not be adjacent in GCC 10's output:
```
leaq x at tlsdesc(%rip), %rax
something else
call *x at tlscall(%rax)
```
This patch supports the case. While here, support non-RAX registers for
R_X86_64_GOTPC32_TLSDESC, in case the compiler generates inefficient:
```
leaq x at tlsdesc(%rip), %rcx # or %rdx, %rbx, %rdi, ...
movq %rcx, %rax
call *x at tlscall(%rax) # GNU ld/gold error for non-RAX
```
Differential Revision: https://reviews.llvm.org/D114416
Added:
Modified:
lld/ELF/Arch/X86_64.cpp
lld/docs/ReleaseNotes.rst
lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
lld/test/ELF/x86-64-tlsdesc-gd.s
Removed:
################################################################################
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 40436752399b4..1c399e5d1b265 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -99,7 +99,11 @@ X86_64::X86_64() {
defaultImageBase = 0x200000;
}
-int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
+int X86_64::getTlsGdRelaxSkip(RelType type) const {
+ // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
+ return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 1
+ : 2;
+}
// Opcodes for the
diff erent X86_64 jmp instructions.
enum JmpInsnOpcode : uint32_t {
@@ -443,24 +447,24 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
// The original code used a pc relative relocation and so we have to
// compensate for the -4 in had in the addend.
write32le(loc + 8, val + 4);
- } else {
- // Convert
- // lea x at tlsgd(%rip), %rax
- // call *(%rax)
- // to the following two instructions.
- assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
- if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
- error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
- "in callq *x at tlsdesc(%rip), %rax");
+ } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
+ // Convert leaq x at tlsdesc(%rip), %REG to movq $x at tpoff, %REG.
+ if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
+ (loc[-1] & 0xc7) != 0x05) {
+ errorOrWarn(getErrorLocation(loc - 3) +
+ "R_X86_64_GOTPC32_TLSDESC must be used "
+ "in leaq x at tlsdesc(%rip), %REG");
return;
}
- // movq $x at tpoff(%rip),%rax
+ loc[-3] = 0x48 | ((loc[-3] >> 2) & 1);
loc[-2] = 0xc7;
- loc[-1] = 0xc0;
+ loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7);
write32le(loc, val + 4);
- // xchg ax,ax
- loc[4] = 0x66;
- loc[5] = 0x90;
+ } else {
+ // Convert call *x at tlsdesc(%REG) to xchg ax, ax.
+ assert(rel.type == R_X86_64_TLSDESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
}
}
@@ -484,23 +488,23 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
// Both code sequences are PC relatives, but since we are moving the
// constant forward by 8 bytes we have to subtract the value by 8.
write32le(loc + 8, val - 8);
- } else {
- // Convert
- // lea x at tlsgd(%rip), %rax
- // call *(%rax)
- // to the following two instructions.
+ } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
+ // Convert leaq x at tlsdesc(%rip), %REG to movq x at gottpoff(%rip), %REG.
assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
- if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
- error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
- "in callq *x at tlsdesc(%rip), %rax");
+ if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
+ (loc[-1] & 0xc7) != 0x05) {
+ errorOrWarn(getErrorLocation(loc - 3) +
+ "R_X86_64_GOTPC32_TLSDESC must be used "
+ "in leaq x at tlsdesc(%rip), %REG");
return;
}
- // movq x at gottpoff(%rip),%rax
loc[-2] = 0x8b;
write32le(loc, val);
- // xchg ax,ax
- loc[4] = 0x66;
- loc[5] = 0x90;
+ } else {
+ // Convert call *x at tlsdesc(%rax) to xchg ax, ax.
+ assert(rel.type == R_X86_64_TLSDESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
}
}
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 1d53177200c30..90245996b1cf0 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -36,6 +36,9 @@ Architecture specific changes:
* The x86-32 port now supports TLSDESC (``-mtls-dialect=gnu2``).
(`D112582 <https://reviews.llvm.org/D112582>`_)
+* The x86-64 port now handles non-RAX/non-adjacent ``R_X86_64_GOTPC32_TLSDESC``
+ and ``R_X86_64_TLSDESC_CALL`` (``-mtls-dialect=gnu2``).
+ (`D114416 <https://reviews.llvm.org/D114416>`_)
* For x86-64, ``--no-relax`` now suppresses ``R_X86_64_GOTPCRELX`` and
``R_X86_64_REX_GOTPCRELX`` GOT optimization
(`D113615 <https://reviews.llvm.org/D113615>`_)
diff --git a/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s b/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
index bd75ff2bc8f3e..a04087bb096a5 100644
--- a/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
+++ b/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
@@ -8,8 +8,8 @@
## GD to IE relaxation.
# RUN: not ld.lld %t.o %t1.so -o /dev/null 2>&1 | FileCheck -DINPUT=%t.o %s
-# CHECK: error: [[INPUT]]:(.text+0x0): R_X86_64_GOTPC32_TLSDESC must be used in callq *x at tlsdesc(%rip), %rax
+# CHECK: error: [[INPUT]]:(.text+0x0): R_X86_64_GOTPC32_TLSDESC must be used in leaq x at tlsdesc(%rip), %REG
-leaq a at tlsdesc(%rip), %rdx
+leaq a at tlsdesc(%rbx), %rdx
call *a at tlscall(%rdx)
movl %fs:(%rax), %eax
diff --git a/lld/test/ELF/x86-64-tlsdesc-gd.s b/lld/test/ELF/x86-64-tlsdesc-gd.s
index 626bad04d938f..f0cdf08040f39 100644
--- a/lld/test/ELF/x86-64-tlsdesc-gd.s
+++ b/lld/test/ELF/x86-64-tlsdesc-gd.s
@@ -19,38 +19,40 @@
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s
# GD-RELA: .rela.dyn {
-# GD-RELA-NEXT: 0x23B8 R_X86_64_TLSDESC - 0xB
-# GD-RELA-NEXT: 0x23A8 R_X86_64_TLSDESC a 0x0
-# GD-RELA-NEXT: 0x23C8 R_X86_64_TLSDESC c 0x0
+# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC - 0xB
+# GD-RELA-NEXT: 0x23B0 R_X86_64_TLSDESC a 0x0
+# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC c 0x0
# GD-RELA-NEXT: }
# GD-RELA: Hex dump of section '.got':
-# GD-RELA-NEXT: 0x000023a8 00000000 00000000 00000000 00000000
-# GD-RELA-NEXT: 0x000023b8 00000000 00000000 00000000 00000000
-# GD-RELA-NEXT: 0x000023c8 00000000 00000000 00000000 00000000
+# GD-RELA-NEXT: 0x000023b0 00000000 00000000 00000000 00000000
+# GD-RELA-NEXT: 0x000023c0 00000000 00000000 00000000 00000000
+# GD-RELA-NEXT: 0x000023d0 00000000 00000000 00000000 00000000
# GD-REL: .rel.dyn {
-# GD-REL-NEXT: 0x23A0 R_X86_64_TLSDESC -
-# GD-REL-NEXT: 0x2390 R_X86_64_TLSDESC a
-# GD-REL-NEXT: 0x23B0 R_X86_64_TLSDESC c
+# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC -
+# GD-REL-NEXT: 0x2398 R_X86_64_TLSDESC a
+# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC c
# GD-REL-NEXT: }
# GD-REL: Hex dump of section '.got':
-# GD-REL-NEXT: 0x00002390 00000000 00000000 00000000 00000000
-# GD-REL-NEXT: 0x000023a0 00000000 00000000 0b000000 00000000
-# GD-REL-NEXT: 0x000023b0 00000000 00000000 00000000 00000000
+# GD-REL-NEXT: 0x00002398 00000000 00000000 00000000 00000000
+# GD-REL-NEXT: 0x000023a8 00000000 00000000 0b000000 00000000
+# GD-REL-NEXT: 0x000023b8 00000000 00000000 00000000 00000000
-## &.rela.dyn[a]-pc = 0x23A8-0x12e7 = 4289
-# GD: leaq 4289(%rip), %rax
+## &.rela.dyn[a]-pc = 0x23B0-0x12e7 = 4297
+# GD: leaq 4297(%rip), %rax
# GD-NEXT: 12e7: callq *(%rax)
# GD-NEXT: movl %fs:(%rax), %eax
-## &.rela.dyn[b]-pc = 0x23B8-0x12f3 = 4293
-# GD-NEXT: leaq 4293(%rip), %rax
-# GD-NEXT: 12f3: callq *(%rax)
+## &.rela.dyn[b]-pc = 0x23C0-0x12f3 = 4301
+# GD-NEXT: leaq 4301(%rip), %rcx
+# GD-NEXT: 12f3: movq %rcx, %rax
+# GD-NEXT: callq *(%rax)
# GD-NEXT: movl %fs:(%rax), %eax
-## &.rela.dyn[c]-pc = 0x23C8-0x12f3 = 4297
-# GD-NEXT: leaq 4297(%rip), %rax
-# GD-NEXT: 12ff: callq *(%rax)
+## &.rela.dyn[c]-pc = 0x23D0-0x1302 = 4302
+# GD-NEXT: leaq 4302(%rip), %r15
+# GD-NEXT: 1302: movq %r15, %rax
+# GD-NEXT: callq *(%rax)
# GD-NEXT: movl %fs:(%rax), %eax
# NOREL: no relocations
@@ -60,39 +62,47 @@
# LE-NEXT: nop
# LE-NEXT: movl %fs:(%rax), %eax
## tpoff(b) = st_value(b) - tls_size = -5
-# LE: movq $-5, %rax
+# LE: movq $-5, %rcx
+# LE-NEXT: movq %rcx, %rax
# LE-NEXT: nop
# LE-NEXT: movl %fs:(%rax), %eax
## tpoff(c) = st_value(c) - tls_size = -4
-# LE: movq $-4, %rax
+# LE: movq $-4, %r15
+# LE-NEXT: movq %r15, %rax
# LE-NEXT: nop
# LE-NEXT: movl %fs:(%rax), %eax
# IE-REL: .rela.dyn {
-# IE-REL-NEXT: 0x202370 R_X86_64_TPOFF64 c 0x0
+# IE-REL-NEXT: 0x202378 R_X86_64_TPOFF64 c 0x0
# IE-REL-NEXT: }
## a is relaxed to use LE.
# IE: movq $-4, %rax
# IE-NEXT: nop
# IE-NEXT: movl %fs:(%rax), %eax
-# IE-NEXT: movq $-1, %rax
+# IE-NEXT: movq $-1, %rcx
+# IE-NEXT: movq %rcx, %rax
# IE-NEXT: nop
# IE-NEXT: movl %fs:(%rax), %eax
-## &.rela.dyn[c]-pc = 0x202370 - 0x2012a7 = 4297
-# IE-NEXT: movq 4297(%rip), %rax
-# IE-NEXT: 2012a7: nop
+## &.rela.dyn[c]-pc = 0x202378 - 0x2012aa = 4302
+# IE-NEXT: movq 4302(%rip), %r15
+# IE-NEXT: 2012aa: movq %r15, %rax
+# IE-NEXT: nop
# IE-NEXT: movl %fs:(%rax), %eax
leaq a at tlsdesc(%rip), %rax
call *a at tlscall(%rax)
movl %fs:(%rax), %eax
-leaq b at tlsdesc(%rip), %rax
+## leaq/call may not be adjacent: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5665
+## Test non-RAX registers as well.
+leaq b at tlsdesc(%rip), %rcx
+movq %rcx, %rax
call *b at tlscall(%rax)
movl %fs:(%rax), %eax
-leaq c at tlsdesc(%rip), %rax
+leaq c at tlsdesc(%rip), %r15
+movq %r15, %rax
call *c at tlscall(%rax)
movl %fs:(%rax), %eax
More information about the llvm-commits
mailing list