[lld] [X86][LLD] Handle R_X86_64_CODE_6_GOTTPOFF relocation type (PR #117675)

Mon Nov 25 22:34:35 PST 2024

https://github.com/fzou1 created https://github.com/llvm/llvm-project/pull/117675

For

    add %reg1, name at GOTTPOFF(%rip), %reg2
    add name at GOTTPOFF(%rip), %reg1, %reg2
    {nf} add %reg1, name at GOTTPOFF(%rip), %reg2
    {nf} add name at GOTTPOFF(%rip), %reg1, %reg2
    {nf} add name at GOTTPOFF(%rip), %reg

add

    R_X86_64_CODE_6_GOTTPOFF = 50

in #117277.

Linker can treat R_X86_64_CODE_6_GOTTPOFF as R_X86_64_GOTTPOFF or convert the instructions above to

    add $name at tpoff, %reg1, %reg2
    add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg

if the first byte of the instruction at the relocation offset - 6 is 0x62 (namely, encoded w/EVEX prefix) when possible.

Binutils patch: bminor/binutils-gdb at 5bc71c2
Binutils mailthread: https://sourceware.org/pipermail/binutils/2024-February/132351.html
ABI discussion: https://groups.google.com/g/x86-64-abi/c/FhEZjCtDLFw/m/VHDjN4orAgAJ
Blog: https://kanrobert.github.io/rfc/All-about-APX-relocation

>From 1d4f291949d9d1040be6359eade5b16005fc2c18 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 26 Nov 2024 14:21:11 +0800
Subject: [PATCH] [X86][LLD] Handle R_X86_64_CODE_6_GOTTPOFF relocation type

For

    add %reg1, name at GOTTPOFF(%rip), %reg2
    add name at GOTTPOFF(%rip), %reg1, %reg2
    {nf} add %reg1, name at GOTTPOFF(%rip), %reg2
    {nf} add name at GOTTPOFF(%rip), %reg1, %reg2
    {nf} add name at GOTTPOFF(%rip), %reg

add

    R_X86_64_CODE_6_GOTTPOFF = 50

in #117277.

Linker can treat R_X86_64_CODE_6_GOTTPOFF as R_X86_64_GOTTPOFF or convert the
instructions above to

    add $name at tpoff, %reg1, %reg2
    add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg1, %reg2
    {nf} add $name at tpoff, %reg

if the first byte of the instruction at the relocation offset - 6 is 0x62
(namely, encoded w/EVEX prefix) when possible.

Binutils patch: bminor/binutils-gdb at 5bc71c2
Binutils mailthread: https://sourceware.org/pipermail/binutils/2024-February/132351.html
ABI discussion: https://groups.google.com/g/x86-64-abi/c/FhEZjCtDLFw/m/VHDjN4orAgAJ
Blog: https://kanrobert.github.io/rfc/All-about-APX-relocation
---
 lld/ELF/Arch/X86_64.cpp | 89 +++++++++++++++++++++++++++--------------
 lld/test/ELF/tls-opt.s  | 17 ++++++++
 2 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 2dcce5c224d5d6..749ab7ca7db7e7 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
   case R_X86_64_REX_GOTPCRELX:
   case R_X86_64_CODE_4_GOTPCRELX:
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_6_GOTTPOFF:
     return R_GOT_PC;
   case R_X86_64_GOTOFF64:
     return R_GOTPLTREL;
@@ -547,44 +548,68 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
   }
 }
 
-// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
-// R_X86_64_TPOFF32 so that it does not use GOT.
+// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_6_GOTTPOFF relocation can
+// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
 void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                             uint64_t val) const {
   uint8_t *inst = loc - 3;
   uint8_t reg = loc[-1] >> 3;
   uint8_t *regSlot = loc - 1;
 
-  // Note that ADD with RSP or R12 is converted to ADD instead of LEA
-  // because LEA with these registers needs 4 bytes to encode and thus
-  // wouldn't fit the space.
-
-  if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
-    // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
-    memcpy(inst, "\x48\x81\xc4", 3);
-  } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
-    // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
-    memcpy(inst, "\x49\x81\xc4", 3);
-  } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
-    // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
-    memcpy(inst, "\x4d\x8d", 2);
-    *regSlot = 0x80 | (reg << 3) | reg;
-  } else if (memcmp(inst, "\x48\x03", 2) == 0) {
-    // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
-    memcpy(inst, "\x48\x8d", 2);
-    *regSlot = 0x80 | (reg << 3) | reg;
-  } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
-    // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
-    memcpy(inst, "\x49\xc7", 2);
-    *regSlot = 0xc0 | reg;
-  } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
-    // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
-    memcpy(inst, "\x48\xc7", 2);
-    *regSlot = 0xc0 | reg;
+  if (rel.type == R_X86_64_GOTTPOFF) {
+    // Note that ADD with RSP or R12 is converted to ADD instead of LEA
+    // because LEA with these registers needs 4 bytes to encode and thus
+    // wouldn't fit the space.
+
+    if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
+      // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
+      memcpy(inst, "\x48\x81\xc4", 3);
+    } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
+      // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
+      memcpy(inst, "\x49\x81\xc4", 3);
+    } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
+      // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
+      memcpy(inst, "\x4d\x8d", 2);
+      *regSlot = 0x80 | (reg << 3) | reg;
+    } else if (memcmp(inst, "\x48\x03", 2) == 0) {
+      // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
+      memcpy(inst, "\x48\x8d", 2);
+      *regSlot = 0x80 | (reg << 3) | reg;
+    } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
+      // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
+      memcpy(inst, "\x49\xc7", 2);
+      *regSlot = 0xc0 | reg;
+    } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
+      // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
+      memcpy(inst, "\x48\xc7", 2);
+      *regSlot = 0xc0 | reg;
+    } else {
+      ErrAlways(ctx)
+          << getErrorLoc(ctx, loc - 3)
+          << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+    }
+  } else if (rel.type == R_X86_64_CODE_6_GOTTPOFF) {
+    if (loc[-6] != 0x62) {
+      Err(ctx) << getErrorLoc(ctx, loc - 6)
+               << "Invalid prefix with R_X86_64_CODE_6_GOTTPOFF!";
+      return;
+    }
+    if (loc[-2] == 0x3 || loc[-2] == 0x1) {
+      // "addq foo at gottpoff(%rip), %reg1, %reg2" -> "addq $foo, %reg1, %reg2"
+      loc[-2] = 0x81;
+      // Move R bits to B bits in EVEX payloads and ModRM byte.
+      if ((loc[-5] & (1 << 7)) == 0)
+        loc[-5] = (loc[-5] | (1 << 7)) & ~(1 << 5);
+      if ((loc[-5] & (1 << 4)) == 0)
+        loc[-5] = loc[-5] | (1 << 4) | (1 << 3);
+      *regSlot = 0xc0 | reg;
+    } else {
+      Err(ctx)
+          << getErrorLoc(ctx, loc - 6)
+          << "R_X86_64_CODE_6_GOTTPOFF must be used in ADDQ instructions only";
+    }
   } else {
-    ErrAlways(ctx)
-        << getErrorLoc(ctx, loc - 3)
-        << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+    llvm_unreachable("Unsupported relocation type!");
   }
 
   // The original code used a PC relative relocation.
@@ -741,6 +766,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_X86_64_CODE_4_GOTPCRELX:
   case R_X86_64_PC32:
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_6_GOTTPOFF:
   case R_X86_64_PLT32:
   case R_X86_64_TLSGD:
   case R_X86_64_TLSLD:
@@ -850,6 +876,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     }
     break;
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_6_GOTTPOFF:
     if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
       relaxTlsIeToLe(loc, rel, val);
     } else {
diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s
index ce90ba4f869ce4..784f575b69ede5 100644
--- a/lld/test/ELF/tls-opt.s
+++ b/lld/test/ELF/tls-opt.s
@@ -20,6 +20,14 @@
 // DISASM-NEXT:   leaq -4(%r15), %r15
 // DISASM-NEXT:   addq $-4, %rsp
 // DISASM-NEXT:   addq $-4, %r12
+// DISASM-NEXT:   addq $-10, %r16, %r16
+// DISASM-NEXT:   addq $-10, %r16, %r20
+// DISASM-NEXT:   addq $-10, %r16, %rax
+// DISASM-NEXT:   addq $-10, %rax, %r16
+// DISASM-NEXT:   addq $-10, %r8, %r16
+// DISASM-NEXT:   addq $-10, %rax, %r12
+// DISASM-NEXT:   {nf} addq $-10, %r8, %r16
+// DISASM-NEXT:   {nf} addq $-10, %rax, %r12
 
 // LD to LE:
 // DISASM-NEXT:   movq %fs:0, %rax
@@ -69,6 +77,15 @@ _start:
  addq tls1 at GOTTPOFF(%rip), %r15
  addq tls1 at GOTTPOFF(%rip), %rsp
  addq tls1 at GOTTPOFF(%rip), %r12
+# NDD
+ addq tls0 at GOTTPOFF(%rip), %r16, %r16
+ addq tls0 at GOTTPOFF(%rip), %r16, %r20
+ addq tls0 at GOTTPOFF(%rip), %r16, %rax
+ addq tls0 at GOTTPOFF(%rip), %rax, %r16
+ addq %r8, tls0 at GOTTPOFF(%rip), %r16
+ addq tls0 at GOTTPOFF(%rip), %rax, %r12
+ {nf} addq %r8, tls0 at GOTTPOFF(%rip), %r16
+ {nf} addq tls0 at GOTTPOFF(%rip), %rax, %r12
 
  // LD to LE
  leaq tls0 at tlsld(%rip), %rdi