[lld] [X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation type (PR #116634)
Feng Zou via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 22:28:11 PST 2024
https://github.com/fzou1 updated https://github.com/llvm/llvm-project/pull/116634
>From ea53c432c3953fd08664a210e654893bbe92d8cc Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Wed, 13 Nov 2024 20:02:55 +0800
Subject: [PATCH 1/4] [X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation
type
For
mov name at GOTTPOFF(%rip), %reg
add name at GOTTPOFF(%rip), %reg
add
`R_X86_64_CODE_4_GOTTPOFF` = 44
in #116633.
Linker can treat `R_X86_64_CODE_4_GOTTPOFF` as `R_X86_64_GOTTPOFF` or convert
the instructions above to
mov $name, %reg
add $name, %reg
if the first byte of the instruction at the relocation `offset - 4` is `0xd5`
(namely, encoded w/REX2 prefix) when possible.
---
lld/ELF/Arch/X86_64.cpp | 79 ++++++++++++++---------
lld/test/ELF/pack-dyn-relocs-tls-x86-64.s | 6 +-
lld/test/ELF/tls-opt.s | 13 ++++
lld/test/ELF/x86-64-tls-ie-local.s | 28 ++++++--
4 files changed, 87 insertions(+), 39 deletions(-)
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 2dcce5c224d5d6..4fb933e1c7b260 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_REX_GOTPCRELX:
case R_X86_64_CODE_4_GOTPCRELX:
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
@@ -547,44 +548,58 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
}
-// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
-// R_X86_64_TPOFF32 so that it does not use GOT.
+// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
+// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
uint8_t *inst = loc - 3;
uint8_t reg = loc[-1] >> 3;
uint8_t *regSlot = loc - 1;
- // Note that ADD with RSP or R12 is converted to ADD instead of LEA
- // because LEA with these registers needs 4 bytes to encode and thus
- // wouldn't fit the space.
-
- if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
- // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
- memcpy(inst, "\x48\x81\xc4", 3);
- } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
- // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
- memcpy(inst, "\x49\x81\xc4", 3);
- } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
- // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
- memcpy(inst, "\x4d\x8d", 2);
- *regSlot = 0x80 | (reg << 3) | reg;
- } else if (memcmp(inst, "\x48\x03", 2) == 0) {
- // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
- memcpy(inst, "\x48\x8d", 2);
- *regSlot = 0x80 | (reg << 3) | reg;
- } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
- // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
- memcpy(inst, "\x49\xc7", 2);
- *regSlot = 0xc0 | reg;
- } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
- // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
- memcpy(inst, "\x48\xc7", 2);
- *regSlot = 0xc0 | reg;
+ if (rel.type == R_X86_64_GOTTPOFF) {
+ // Note that ADD with RSP or R12 is converted to ADD instead of LEA
+ // because LEA with these registers needs 4 bytes to encode and thus
+ // wouldn't fit the space.
+
+ if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
+ // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
+ memcpy(inst, "\x48\x81\xc4", 3);
+ } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
+ // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
+ memcpy(inst, "\x49\x81\xc4", 3);
+ } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
+ // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
+ memcpy(inst, "\x4d\x8d", 2);
+ *regSlot = 0x80 | (reg << 3) | reg;
+ } else if (memcmp(inst, "\x48\x03", 2) == 0) {
+ // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
+ memcpy(inst, "\x48\x8d", 2);
+ *regSlot = 0x80 | (reg << 3) | reg;
+ } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
+ // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
+ memcpy(inst, "\x49\xc7", 2);
+ *regSlot = 0xc0 | reg;
+ } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
+ // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
+ memcpy(inst, "\x48\xc7", 2);
+ *regSlot = 0xc0 | reg;
+ } else {
+ ErrAlways(ctx)
+ << getErrorLoc(ctx, loc - 3)
+ << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+ }
} else {
- ErrAlways(ctx)
- << getErrorLoc(ctx, loc - 3)
- << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+ assert(rel.type == R_X86_64_CODE_4_GOTTPOFF &&
+ "Unsupported relocation type!");
+ assert((loc[-4] == 0xd5) &&
+ "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!");
+ const uint8_t rex = loc[-3];
+ loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
+ *regSlot = 0xc0 | reg;
+ if (loc[-2] == 0x8b)
+ loc[-2] = 0xc7;
+ else
+ loc[-2] = 0x81;
}
// The original code used a PC relative relocation.
@@ -741,6 +756,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_X86_64_CODE_4_GOTPCRELX:
case R_X86_64_PC32:
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_PLT32:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
@@ -850,6 +866,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
break;
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
relaxTlsIeToLe(loc, rel, val);
} else {
diff --git a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
index b3b1e8a0602772..c6464b4bece097 100644
--- a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
+++ b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
@@ -12,12 +12,16 @@
.globl foo
foo:
movq tlsvar at GOTTPOFF(%rip), %rcx
+ movq tlsvar2 at GOTTPOFF(%rip), %r31
+
.section .tdata,"awT", at progbits
.space 0x1234
tlsvar:
.word 42
-
+tlsvar2:
+ .word 42
// CHECK: Section ({{.+}}) .rela.dyn {
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234
+// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236
// CHECK-NEXT: }
diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s
index ce90ba4f869ce4..818203ee19cb7c 100644
--- a/lld/test/ELF/tls-opt.s
+++ b/lld/test/ELF/tls-opt.s
@@ -20,6 +20,12 @@
// DISASM-NEXT: leaq -4(%r15), %r15
// DISASM-NEXT: addq $-4, %rsp
// DISASM-NEXT: addq $-4, %r12
+// DISASM-NEXT: movq $-8, %r16
+// DISASM-NEXT: movq $-8, %r20
+// DISASM-NEXT: movq $-4, %r16
+// DISASM-NEXT: addq $-8, %r16
+// DISASM-NEXT: addq $-8, %r28
+// DISASM-NEXT: addq $-4, %r16
// LD to LE:
// DISASM-NEXT: movq %fs:0, %rax
@@ -69,6 +75,13 @@ _start:
addq tls1 at GOTTPOFF(%rip), %r15
addq tls1 at GOTTPOFF(%rip), %rsp
addq tls1 at GOTTPOFF(%rip), %r12
+ # EGPR
+ movq tls0 at GOTTPOFF(%rip), %r16
+ movq tls0 at GOTTPOFF(%rip), %r20
+ movq tls1 at GOTTPOFF(%rip), %r16
+ addq tls0 at GOTTPOFF(%rip), %r16
+ addq tls0 at GOTTPOFF(%rip), %r28
+ addq tls1 at GOTTPOFF(%rip), %r16
// LD to LE
leaq tls0 at tlsld(%rip), %rdi
diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s
index c527c86e667713..08547d6b4b5125 100644
--- a/lld/test/ELF/x86-64-tls-ie-local.s
+++ b/lld/test/ELF/x86-64-tls-ie-local.s
@@ -5,24 +5,38 @@
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
-# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8
+# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
# REL: .rela.dyn {
-# REL-NEXT: 0x2338 R_X86_64_TPOFF64 - 0x0
-# REL-NEXT: 0x2340 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT: 0x2378 R_X86_64_TPOFF64 - 0x0
+# REL-NEXT: 0x2380 R_X86_64_TPOFF64 - 0x8
+# REL-NEXT: 0x2388 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT: 0x2390 R_X86_64_TPOFF64 - 0xC
# REL-NEXT: }
-## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281
-## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282
-# CHECK: 1278: addq 4281(%rip), %rax
-# CHECK-NEXT: 127f: addq 4282(%rip), %rax
+## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
+## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
+## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
+## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
+
+# CHECK: 12a8: addq 4297(%rip), %rax
+# CHECK-NEXT: 12af: addq 4298(%rip), %rax
+# CHECK-NEXT: 12b6: addq 4298(%rip), %r16
+# CHECK-NEXT: 12be: addq 4298(%rip), %r16
addq foo at GOTTPOFF(%rip), %rax
addq bar at GOTTPOFF(%rip), %rax
+addq foo2 at GOTTPOFF(%rip), %r16
+addq bar2 at GOTTPOFF(%rip), %r16
+
.section .tbss,"awT", at nobits
foo:
.long 0
+foo2:
+ .long 0
bar:
.long 0
+bar2:
+ .long 0
>From d1fdd482e1f92709a3bdda9f1c008de5c89f4b1e Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 19 Nov 2024 16:50:58 +0800
Subject: [PATCH 2/4] Updated test, added comments and removed symbols
unneeded.
---
lld/ELF/Arch/X86_64.cpp | 12 +++++++++--
lld/test/ELF/x86-64-tls-ie-local.s | 32 ++++++++++++------------------
2 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 4fb933e1c7b260..3e3b6af086351f 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -584,7 +584,7 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
memcpy(inst, "\x48\xc7", 2);
*regSlot = 0xc0 | reg;
} else {
- ErrAlways(ctx)
+ Err(ctx)
<< getErrorLoc(ctx, loc - 3)
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
}
@@ -596,10 +596,18 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
const uint8_t rex = loc[-3];
loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
*regSlot = 0xc0 | reg;
+
+ // "movq foo at gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]"
if (loc[-2] == 0x8b)
loc[-2] = 0xc7;
- else
+ else {
+ // "addq foo at gottpoff(%rip),%r[16-31]" -> "addq $foo,%r[16-31]"
+ if (loc[-2] != 0x03)
+ Err(ctx) << getErrorLoc(ctx, loc - 3)
+ << "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
+ "instructions only";
loc[-2] = 0x81;
+ }
}
// The original code used a PC relative relocation.
diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s
index 08547d6b4b5125..340a654ef9c284 100644
--- a/lld/test/ELF/x86-64-tls-ie-local.s
+++ b/lld/test/ELF/x86-64-tls-ie-local.s
@@ -5,38 +5,32 @@
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
-# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
+# SEC: .got PROGBITS 0000000000002348 000348 000010 00 WA 0 0 8
## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
# REL: .rela.dyn {
-# REL-NEXT: 0x2378 R_X86_64_TPOFF64 - 0x0
-# REL-NEXT: 0x2380 R_X86_64_TPOFF64 - 0x8
-# REL-NEXT: 0x2388 R_X86_64_TPOFF64 - 0x4
-# REL-NEXT: 0x2390 R_X86_64_TPOFF64 - 0xC
+# REL-NEXT: 0x2348 R_X86_64_TPOFF64 - 0x0
+# REL-NEXT: 0x2350 R_X86_64_TPOFF64 - 0x4
# REL-NEXT: }
-## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
-## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
-## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
-## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
+## &.got[0] - 0x127f = 0x2348 - 0x127f = 4297
+## &.got[1] - 0x1286 = 0x2350 - 0x1286 = 4298
+## &.got[2] - 0x128e = 0x2348 - 0x128e = 4282
+## &.got[3] - 0x1296 = 0x2350 - 0x1296 = 4282
-# CHECK: 12a8: addq 4297(%rip), %rax
-# CHECK-NEXT: 12af: addq 4298(%rip), %rax
-# CHECK-NEXT: 12b6: addq 4298(%rip), %r16
-# CHECK-NEXT: 12be: addq 4298(%rip), %r16
+# CHECK: 1278: addq 4297(%rip), %rax
+# CHECK-NEXT: 127f: addq 4298(%rip), %rax
+# CHECK-NEXT: 1286: addq 4282(%rip), %r16
+# CHECK-NEXT: 128e: addq 4282(%rip), %r16
addq foo at GOTTPOFF(%rip), %rax
addq bar at GOTTPOFF(%rip), %rax
-addq foo2 at GOTTPOFF(%rip), %r16
-addq bar2 at GOTTPOFF(%rip), %r16
+addq foo at GOTTPOFF(%rip), %r16
+addq bar at GOTTPOFF(%rip), %r16
.section .tbss,"awT", at nobits
foo:
.long 0
-foo2:
- .long 0
bar:
.long 0
-bar2:
- .long 0
>From 7b494f4db1ce587bce2f847d4b999500b804f4af Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Sat, 23 Nov 2024 13:56:45 +0800
Subject: [PATCH 3/4] Updated for readability.
---
lld/ELF/Arch/X86_64.cpp | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 3e3b6af086351f..afd84d2344c3ed 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -588,26 +588,27 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
<< getErrorLoc(ctx, loc - 3)
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
}
- } else {
- assert(rel.type == R_X86_64_CODE_4_GOTTPOFF &&
- "Unsupported relocation type!");
- assert((loc[-4] == 0xd5) &&
- "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!");
+ } else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) {
+ if (loc[-4] != 0xd5)
+ Err(ctx) << getErrorLoc(ctx, loc - 4)
+ << "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!";
const uint8_t rex = loc[-3];
loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
*regSlot = 0xc0 | reg;
- // "movq foo at gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]"
- if (loc[-2] == 0x8b)
+ if (loc[-2] == 0x8b) {
+ // "movq foo at gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]"
loc[-2] = 0xc7;
- else {
+ } else if (loc[-2] == 0x03) {
// "addq foo at gottpoff(%rip),%r[16-31]" -> "addq $foo,%r[16-31]"
- if (loc[-2] != 0x03)
- Err(ctx) << getErrorLoc(ctx, loc - 3)
- << "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
- "instructions only";
loc[-2] = 0x81;
+ } else {
+ Err(ctx) << getErrorLoc(ctx, loc - 4)
+ << "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
+ "instructions only";
}
+ } else {
+ llvm_unreachable("Unsupported relocation type!");
}
// The original code used a PC relative relocation.
>From 4123725cc7a4810f9aa926230dd63fd6f2dbab3c Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 26 Nov 2024 14:26:35 +0800
Subject: [PATCH 4/4] Added early return for invalid prefix
---
lld/ELF/Arch/X86_64.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index afd84d2344c3ed..914e1e727981c7 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -589,9 +589,11 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
}
} else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) {
- if (loc[-4] != 0xd5)
+ if (loc[-4] != 0xd5) {
Err(ctx) << getErrorLoc(ctx, loc - 4)
<< "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!";
+ return;
+ }
const uint8_t rex = loc[-3];
loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
*regSlot = 0xc0 | reg;
More information about the llvm-commits
mailing list