[lld] [X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation type (PR #116634)

Feng Zou via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 02:35:25 PST 2024


https://github.com/fzou1 updated https://github.com/llvm/llvm-project/pull/116634

>From ea53c432c3953fd08664a210e654893bbe92d8cc Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Wed, 13 Nov 2024 20:02:55 +0800
Subject: [PATCH 1/2] [X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation
 type

For

  mov name at GOTTPOFF(%rip), %reg
  add name at GOTTPOFF(%rip), %reg

add

  `R_X86_64_CODE_4_GOTTPOFF` = 44

in #116633.

Linker can treat `R_X86_64_CODE_4_GOTTPOFF` as `R_X86_64_GOTTPOFF` or convert
the instructions above to

  mov $name, %reg
  add $name, %reg

if the first byte of the instruction at the relocation `offset - 4` is `0xd5`
(namely, encoded w/REX2 prefix) when possible.
---
 lld/ELF/Arch/X86_64.cpp                   | 79 ++++++++++++++---------
 lld/test/ELF/pack-dyn-relocs-tls-x86-64.s |  6 +-
 lld/test/ELF/tls-opt.s                    | 13 ++++
 lld/test/ELF/x86-64-tls-ie-local.s        | 28 ++++++--
 4 files changed, 87 insertions(+), 39 deletions(-)

diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 2dcce5c224d5d6..4fb933e1c7b260 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
   case R_X86_64_REX_GOTPCRELX:
   case R_X86_64_CODE_4_GOTPCRELX:
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_4_GOTTPOFF:
     return R_GOT_PC;
   case R_X86_64_GOTOFF64:
     return R_GOTPLTREL;
@@ -547,44 +548,58 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
   }
 }
 
-// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
-// R_X86_64_TPOFF32 so that it does not use GOT.
+// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
+// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
 void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                             uint64_t val) const {
   uint8_t *inst = loc - 3;
   uint8_t reg = loc[-1] >> 3;
   uint8_t *regSlot = loc - 1;
 
-  // Note that ADD with RSP or R12 is converted to ADD instead of LEA
-  // because LEA with these registers needs 4 bytes to encode and thus
-  // wouldn't fit the space.
-
-  if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
-    // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
-    memcpy(inst, "\x48\x81\xc4", 3);
-  } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
-    // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
-    memcpy(inst, "\x49\x81\xc4", 3);
-  } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
-    // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
-    memcpy(inst, "\x4d\x8d", 2);
-    *regSlot = 0x80 | (reg << 3) | reg;
-  } else if (memcmp(inst, "\x48\x03", 2) == 0) {
-    // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
-    memcpy(inst, "\x48\x8d", 2);
-    *regSlot = 0x80 | (reg << 3) | reg;
-  } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
-    // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
-    memcpy(inst, "\x49\xc7", 2);
-    *regSlot = 0xc0 | reg;
-  } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
-    // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
-    memcpy(inst, "\x48\xc7", 2);
-    *regSlot = 0xc0 | reg;
+  if (rel.type == R_X86_64_GOTTPOFF) {
+    // Note that ADD with RSP or R12 is converted to ADD instead of LEA
+    // because LEA with these registers needs 4 bytes to encode and thus
+    // wouldn't fit the space.
+
+    if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
+      // "addq foo at gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
+      memcpy(inst, "\x48\x81\xc4", 3);
+    } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
+      // "addq foo at gottpoff(%rip),%r12" -> "addq $foo,%r12"
+      memcpy(inst, "\x49\x81\xc4", 3);
+    } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
+      // "addq foo at gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
+      memcpy(inst, "\x4d\x8d", 2);
+      *regSlot = 0x80 | (reg << 3) | reg;
+    } else if (memcmp(inst, "\x48\x03", 2) == 0) {
+      // "addq foo at gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
+      memcpy(inst, "\x48\x8d", 2);
+      *regSlot = 0x80 | (reg << 3) | reg;
+    } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
+      // "movq foo at gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
+      memcpy(inst, "\x49\xc7", 2);
+      *regSlot = 0xc0 | reg;
+    } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
+      // "movq foo at gottpoff(%rip),%reg" -> "movq $foo,%reg"
+      memcpy(inst, "\x48\xc7", 2);
+      *regSlot = 0xc0 | reg;
+    } else {
+      ErrAlways(ctx)
+          << getErrorLoc(ctx, loc - 3)
+          << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+    }
   } else {
-    ErrAlways(ctx)
-        << getErrorLoc(ctx, loc - 3)
-        << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+    assert(rel.type == R_X86_64_CODE_4_GOTTPOFF &&
+           "Unsupported relocation type!");
+    assert((loc[-4] == 0xd5) &&
+           "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!");
+    const uint8_t rex = loc[-3];
+    loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
+    *regSlot = 0xc0 | reg;
+    if (loc[-2] == 0x8b)
+      loc[-2] = 0xc7;
+    else
+      loc[-2] = 0x81;
   }
 
   // The original code used a PC relative relocation.
@@ -741,6 +756,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_X86_64_CODE_4_GOTPCRELX:
   case R_X86_64_PC32:
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_4_GOTTPOFF:
   case R_X86_64_PLT32:
   case R_X86_64_TLSGD:
   case R_X86_64_TLSLD:
@@ -850,6 +866,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     }
     break;
   case R_X86_64_GOTTPOFF:
+  case R_X86_64_CODE_4_GOTTPOFF:
     if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
       relaxTlsIeToLe(loc, rel, val);
     } else {
diff --git a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
index b3b1e8a0602772..c6464b4bece097 100644
--- a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
+++ b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
@@ -12,12 +12,16 @@
     .globl  foo
 foo:
     movq    tlsvar at GOTTPOFF(%rip), %rcx
+    movq    tlsvar2 at GOTTPOFF(%rip), %r31
+
 
     .section    .tdata,"awT", at progbits
     .space 0x1234
 tlsvar:
     .word   42
-
+tlsvar2:
+    .word   42
 // CHECK:          Section ({{.+}}) .rela.dyn {
 // CHECK-NEXT:     R_X86_64_TPOFF64 - 0x1234
+// CHECK-NEXT:     R_X86_64_TPOFF64 - 0x1236
 // CHECK-NEXT:     }
diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s
index ce90ba4f869ce4..818203ee19cb7c 100644
--- a/lld/test/ELF/tls-opt.s
+++ b/lld/test/ELF/tls-opt.s
@@ -20,6 +20,12 @@
 // DISASM-NEXT:   leaq -4(%r15), %r15
 // DISASM-NEXT:   addq $-4, %rsp
 // DISASM-NEXT:   addq $-4, %r12
+// DISASM-NEXT:   movq $-8, %r16
+// DISASM-NEXT:   movq $-8, %r20
+// DISASM-NEXT:   movq $-4, %r16
+// DISASM-NEXT:   addq $-8, %r16
+// DISASM-NEXT:   addq $-8, %r28
+// DISASM-NEXT:   addq $-4, %r16
 
 // LD to LE:
 // DISASM-NEXT:   movq %fs:0, %rax
@@ -69,6 +75,13 @@ _start:
  addq tls1 at GOTTPOFF(%rip), %r15
  addq tls1 at GOTTPOFF(%rip), %rsp
  addq tls1 at GOTTPOFF(%rip), %r12
+ # EGPR
+ movq tls0 at GOTTPOFF(%rip), %r16
+ movq tls0 at GOTTPOFF(%rip), %r20
+ movq tls1 at GOTTPOFF(%rip), %r16
+ addq tls0 at GOTTPOFF(%rip), %r16
+ addq tls0 at GOTTPOFF(%rip), %r28
+ addq tls1 at GOTTPOFF(%rip), %r16
 
  // LD to LE
  leaq tls0 at tlsld(%rip), %rdi
diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s
index c527c86e667713..08547d6b4b5125 100644
--- a/lld/test/ELF/x86-64-tls-ie-local.s
+++ b/lld/test/ELF/x86-64-tls-ie-local.s
@@ -5,24 +5,38 @@
 # RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
 
-# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8
+# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
 
 ## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
 # REL:      .rela.dyn {
-# REL-NEXT:   0x2338 R_X86_64_TPOFF64 - 0x0
-# REL-NEXT:   0x2340 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT:   0x2378 R_X86_64_TPOFF64 - 0x0
+# REL-NEXT:   0x2380 R_X86_64_TPOFF64 - 0x8
+# REL-NEXT:   0x2388 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT:   0x2390 R_X86_64_TPOFF64 - 0xC
 # REL-NEXT: }
 
-## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281
-## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282
-# CHECK:      1278:       addq 4281(%rip), %rax
-# CHECK-NEXT: 127f:       addq 4282(%rip), %rax
+## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
+## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
+## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
+## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
+
+# CHECK:      12a8:       addq 4297(%rip), %rax
+# CHECK-NEXT: 12af:       addq 4298(%rip), %rax
+# CHECK-NEXT: 12b6:       addq 4298(%rip), %r16
+# CHECK-NEXT: 12be:       addq 4298(%rip), %r16
 
 addq foo at GOTTPOFF(%rip), %rax
 addq bar at GOTTPOFF(%rip), %rax
+addq foo2 at GOTTPOFF(%rip), %r16
+addq bar2 at GOTTPOFF(%rip), %r16
+
 
 .section .tbss,"awT", at nobits
 foo:
   .long 0
+foo2:
+  .long 0
 bar:
   .long 0
+bar2:
+  .long 0

>From d1fdd482e1f92709a3bdda9f1c008de5c89f4b1e Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 19 Nov 2024 16:50:58 +0800
Subject: [PATCH 2/2] Updated test, added comments and removed symbols
 unneeded.

---
 lld/ELF/Arch/X86_64.cpp            | 12 +++++++++--
 lld/test/ELF/x86-64-tls-ie-local.s | 32 ++++++++++++------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 4fb933e1c7b260..3e3b6af086351f 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -584,7 +584,7 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
       memcpy(inst, "\x48\xc7", 2);
       *regSlot = 0xc0 | reg;
     } else {
-      ErrAlways(ctx)
+      Err(ctx)
           << getErrorLoc(ctx, loc - 3)
           << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
     }
@@ -596,10 +596,18 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
     const uint8_t rex = loc[-3];
     loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
     *regSlot = 0xc0 | reg;
+
+    // "movq foo at gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]"
     if (loc[-2] == 0x8b)
       loc[-2] = 0xc7;
-    else
+    else {
+      // "addq foo at gottpoff(%rip),%r[16-31]" -> "addq $foo,%r[16-31]"
+      if (loc[-2] != 0x03)
+        Err(ctx) << getErrorLoc(ctx, loc - 3)
+                 << "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
+                    "instructions only";
       loc[-2] = 0x81;
+    }
   }
 
   // The original code used a PC relative relocation.
diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s
index 08547d6b4b5125..340a654ef9c284 100644
--- a/lld/test/ELF/x86-64-tls-ie-local.s
+++ b/lld/test/ELF/x86-64-tls-ie-local.s
@@ -5,38 +5,32 @@
 # RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
 
-# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
+# SEC: .got PROGBITS 0000000000002348 000348 000010 00 WA 0 0 8
 
 ## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
 # REL:      .rela.dyn {
-# REL-NEXT:   0x2378 R_X86_64_TPOFF64 - 0x0
-# REL-NEXT:   0x2380 R_X86_64_TPOFF64 - 0x8
-# REL-NEXT:   0x2388 R_X86_64_TPOFF64 - 0x4
-# REL-NEXT:   0x2390 R_X86_64_TPOFF64 - 0xC
+# REL-NEXT:   0x2348 R_X86_64_TPOFF64 - 0x0
+# REL-NEXT:   0x2350 R_X86_64_TPOFF64 - 0x4
 # REL-NEXT: }
 
-## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
-## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
-## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
-## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
+## &.got[0] - 0x127f = 0x2348 - 0x127f = 4297
+## &.got[1] - 0x1286 = 0x2350 - 0x1286 = 4298
+## &.got[2] - 0x128e = 0x2348 - 0x128e = 4282
+## &.got[3] - 0x1296 = 0x2350 - 0x1296 = 4282
 
-# CHECK:      12a8:       addq 4297(%rip), %rax
-# CHECK-NEXT: 12af:       addq 4298(%rip), %rax
-# CHECK-NEXT: 12b6:       addq 4298(%rip), %r16
-# CHECK-NEXT: 12be:       addq 4298(%rip), %r16
+# CHECK:      1278:       addq 4297(%rip), %rax
+# CHECK-NEXT: 127f:       addq 4298(%rip), %rax
+# CHECK-NEXT: 1286:       addq 4282(%rip), %r16
+# CHECK-NEXT: 128e:       addq 4282(%rip), %r16
 
 addq foo at GOTTPOFF(%rip), %rax
 addq bar at GOTTPOFF(%rip), %rax
-addq foo2 at GOTTPOFF(%rip), %r16
-addq bar2 at GOTTPOFF(%rip), %r16
+addq foo at GOTTPOFF(%rip), %r16
+addq bar at GOTTPOFF(%rip), %r16
 
 
 .section .tbss,"awT", at nobits
 foo:
   .long 0
-foo2:
-  .long 0
 bar:
   .long 0
-bar2:
-  .long 0



More information about the llvm-commits mailing list