[lld] r255103 - [ELF] - Implement the TLS relocation optimization for 32-bit x86.

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 9 01:55:54 PST 2015


Author: grimar
Date: Wed Dec  9 03:55:54 2015
New Revision: 255103

URL: http://llvm.org/viewvc/llvm-project?rev=255103&view=rev
Log:
[ELF] - Implement the TLS relocation optimization for 32-bit x86.

Implement the TLS relocation optimization for 32-bit x86 that is described in
"ELF Handling For Thread-Local Storage" by Ulrich Drepper, chapter 5,
"IA-32 Linker Optimizations". Specifically, this patch implements these
optimizations: LD->LE, GD->IE, GD->LD, and IE->LE.

Differential revision: http://reviews.llvm.org/D15292

Added:
    lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s
    lld/trunk/test/ELF/tls-opt-gdiele-i686.s
    lld/trunk/test/ELF/tls-opt-i686.s
Modified:
    lld/trunk/ELF/Target.cpp

Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=255103&r1=255102&r2=255103&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Wed Dec  9 03:55:54 2015
@@ -88,6 +88,20 @@ public:
   bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override;
   void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
                    uint64_t SA, uint8_t *PairedLoc = nullptr) const override;
+  bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
+  unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+                               uint64_t P, uint64_t SA,
+                               const SymbolBody &S) const override;
+
+private:
+  void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
+  void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
+  void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
+  void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
 };
 
 class X86_64TargetInfo final : public TargetInfo {
@@ -260,7 +274,7 @@ bool X86TargetInfo::isTlsDynReloc(unsign
   if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 ||
       Type == R_386_TLS_GOTIE)
     return Config->Shared;
-  return false;
+  return Type == R_386_TLS_GD;
 }
 
 void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
@@ -311,8 +325,11 @@ bool X86TargetInfo::relocNeedsCopy(uint3
 }
 
 bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
-  return Type == R_386_TLS_GOTIE || Type == R_386_GOT32 ||
-         relocNeedsPlt(Type, S);
+  if (S.isTLS() && Type == R_386_TLS_GD)
+    return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true);
+  if (Type == R_386_TLS_GOTIE)
+    return !isTlsOptimized(Type, &S);
+  return Type == R_386_GOT32 || relocNeedsPlt(Type, S);
 }
 
 bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const {
@@ -358,6 +375,121 @@ void X86TargetInfo::relocateOne(uint8_t
   }
 }
 
+bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
+  if (Config->Shared || (S && !S->isTLS()))
+    return false;
+  return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM ||
+         Type == R_386_TLS_GD ||
+         (Type == R_386_TLS_GOTIE && !canBePreempted(S, true));
+}
+
+unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+                                            uint32_t Type, uint64_t P,
+                                            uint64_t SA,
+                                            const SymbolBody &S) const {
+  switch (Type) {
+  case R_386_TLS_GD:
+    if (canBePreempted(&S, true))
+      relocateTlsGdToIe(Loc, BufEnd, P, SA);
+    else
+      relocateTlsGdToLe(Loc, BufEnd, P, SA);
+    // The next relocation should be against __tls_get_addr, so skip it
+    return 1;
+  case R_386_TLS_GOTIE:
+    relocateTlsIeToLe(Loc, BufEnd, P, SA);
+    return 0;
+  case R_386_TLS_LDM:
+    relocateTlsLdToLe(Loc, BufEnd, P, SA);
+    // The next relocation should be against __tls_get_addr, so skip it
+    return 1;
+  case R_386_TLS_LDO_32:
+    relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
+    return 0;
+  }
+  llvm_unreachable("Unknown TLS optimization");
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1
+// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how GD can be optimized to IE:
+//   leal x at tlsgd(, %ebx, 1),
+//   call __tls_get_addr at plt
+// Is converted to:
+//   movl %gs:0, %eax
+//   addl x at gotntpoff(%ebx), %eax
+void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                      uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+      0x03, 0x83, 0x00, 0x00, 0x00, 0x00  // addl 0(%ebx), %eax
+  };
+  memcpy(Loc - 3, Inst, sizeof(Inst));
+  relocateOne(Loc + 5, BufEnd, R_386_32, P,
+              SA - Out<ELF32LE>::Got->getVA() -
+                  Out<ELF32LE>::Got->getNumEntries() * 4);
+}
+
+// GD can be optimized to LE:
+//   leal x at tlsgd(, %ebx, 1),
+//   call __tls_get_addr at plt
+// Can be converted to:
+//   movl %gs:0,%eax
+//   addl $x at ntpoff,%eax
+// But gold emits subl $foo at tpoff,%eax instead of addl.
+// These instructions are completely equal in behavior.
+// This method generates subl to be consistent with gold.
+void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                      uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+      0x81, 0xe8, 0x00, 0x00, 0x00, 0x00  // subl 0(%ebx), %eax
+  };
+  memcpy(Loc - 3, Inst, sizeof(Inst));
+  relocateOne(Loc + 5, BufEnd, R_386_32, P,
+              Out<ELF32LE>::TlsPhdr->p_memsz - SA);
+}
+
+// LD can be optimized to LE:
+//   leal foo(%reg),%eax
+//   call ___tls_get_addr
+// Is converted to:
+//   movl %gs:0,%eax
+//   nop
+//   leal 0(%esi,1),%esi
+void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                      uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
+      0x90,                               // nop
+      0x8d, 0x74, 0x26, 0x00              // leal 0(%esi,1),%esi
+  };
+  memcpy(Loc - 2, Inst, sizeof(Inst));
+}
+
+// In some conditions, R_386_TLS_GOTIE relocation can be optimized to
+// R_386_TLS_LE so that it does not use GOT.
+// This function does that. Read "ELF Handling For Thread-Local Storage,
+// 5.1  IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf)
+// by Ulrich Drepper for details.
+void X86TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                      uint64_t SA) const {
+  // Ulrich's document section 6.2 says that @gotntpoff can be
+  // used with MOVL or ADDL instructions.
+  // "MOVL foo at GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG".
+  // "ADDL foo at GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG"
+  // Note: gold converts to ADDL instead of LEAL.
+  uint8_t *Inst = Loc - 2;
+  uint8_t *RegSlot = Loc - 1;
+  uint8_t Reg = (Loc[-1] >> 3) & 7;
+  bool IsMov = *Inst == 0x8b;
+  *Inst = IsMov ? 0xc7 : 0x8d;
+  if (IsMov)
+    *RegSlot = 0xc0 | ((*RegSlot >> 3) & 7);
+  else
+    *RegSlot = 0x80 | Reg | (Reg << 3);
+  relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
+}
+
 X86_64TargetInfo::X86_64TargetInfo() {
   CopyReloc = R_X86_64_COPY;
   PCRelReloc = R_X86_64_PC32;

Added: lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s (added)
+++ lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s Wed Dec  9 03:55:54 2015
@@ -0,0 +1,20 @@
+.type tlsshared0, at object
+.section .tbss,"awT", at nobits
+.globl tlsshared0
+.align 4
+tlsshared0:
+ .long 0
+ .size tlsshared0, 4
+
+.type  tlsshared1, at object
+.globl tlsshared1
+.align 4
+tlsshared1:
+ .long 0
+ .size tlsshared1, 4
+
+.text
+ .globl __tls_get_addr
+ .align 16, 0x90
+ .type __tls_get_addr, at function
+__tls_get_addr:

Added: lld/trunk/test/ELF/tls-opt-gdiele-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt-gdiele-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt-gdiele-i686.s (added)
+++ lld/trunk/test/ELF/tls-opt-gdiele-i686.s Wed Dec  9 03:55:54 2015
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %p/Inputs/tls-opt-gdiele-i686.s -o %tso.o
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
+// RUN: ld.lld -shared %tso.o -o %tso
+// RUN: ld.lld %t.o %tso -o %tout
+// RUN: llvm-readobj -r %tout | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %tout | FileCheck --check-prefix=DISASM %s
+
+// NORELOC:      Relocations [
+// NORELOC-NEXT: Section ({{.*}}) .rel.dyn {
+// NORELOC-NEXT:   0x12050 R_386_TLS_TPOFF tlsshared0 0x0
+// NORELOC-NEXT:   0x12054 R_386_TLS_TPOFF tlsshared1 0x0
+// NORELOC-NEXT:   }
+// NORELOC-NEXT: ]
+
+// DISASM:      Disassembly of section .text:
+// DISASM-NEXT: _start:
+// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11006: 03 83 f8 ff ff ff addl -8(%ebx), %eax
+// DISASM-NEXT: 1100c: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11012: 03 83 fc ff ff ff addl -4(%ebx), %eax
+// DISASM-NEXT: 11018: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 1101e: 81 e8 08 00 00 00 subl $8, %eax
+// DISASM-NEXT: 11024: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 1102a: 81 e8 04 00 00 00 subl $4, %eax
+
+.type tlsexe1, at object
+.section .tbss,"awT", at nobits
+.globl tlsexe1
+.align 4
+tlsexe1:
+ .long 0
+ .size tlsexe1, 4
+
+.type tlsexe2, at object
+.section .tbss,"awT", at nobits
+.globl tlsexe2
+.align 4
+tlsexe2:
+ .long 0
+ .size tlsexe2, 4
+
+.section .text
+.globl ___tls_get_addr
+.type ___tls_get_addr, at function
+___tls_get_addr:
+
+.section .text
+.globl _start
+_start:
+//GD->IE
+leal tlsshared0 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+leal tlsshared1 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+//GD->IE
+leal tlsexe1 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+leal tlsexe2 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt

Added: lld/trunk/test/ELF/tls-opt-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt-i686.s (added)
+++ lld/trunk/test/ELF/tls-opt-i686.s Wed Dec  9 03:55:54 2015
@@ -0,0 +1,69 @@
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
+// RUN: ld.lld %t.o -o %t1
+// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+// NORELOC:      Relocations [
+// NORELOC-NEXT: ]
+
+// DISASM:      Disassembly of section .text:
+// DISASM-NEXT: _start:
+// LD -> LE:
+// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11006: 90                nop
+// DISASM-NEXT: 11007: 8d 74 26 00       leal (%esi), %esi
+// DISASM-NEXT: 1100b: 8d 90 f8 ff ff ff leal -8(%eax), %edx
+// DISASM-NEXT: 11011: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11017: 90                nop
+// DISASM-NEXT: 11018: 8d 74 26 00       leal (%esi), %esi
+// DISASM-NEXT: 1101c: 8d 90 fc ff ff ff leal -4(%eax), %edx
+// IE -> LE:
+// 4294967288 == 0xFFFFFFF8
+// 4294967292 == 0xFFFFFFFC
+// DISASM-NEXT: 11022: 65 a1 00 00 00 00  movl %gs:0, %eax
+// DISASM-NEXT: 11028: c7 c0 f8 ff ff ff  movl $4294967288, %eax
+// DISASM-NEXT: 1102e: 65 a1 00 00 00 00  movl %gs:0, %eax
+// DISASM-NEXT: 11034: c7 c0 fc ff ff ff  movl $4294967292, %eax
+// DISASM-NEXT: 1103a: 65 a1 00 00 00 00  movl %gs:0, %eax
+// DISASM-NEXT: 11040: 8d 80 f8 ff ff ff  leal -8(%eax), %eax
+// DISASM-NEXT: 11046: 65 a1 00 00 00 00  movl %gs:0, %eax
+// DISASM-NEXT: 1104c: 8d 80 fc ff ff ff  leal -4(%eax), %eax
+.type tls0, at object
+.section .tbss,"awT", at nobits
+.globl tls0
+.align 4
+tls0:
+ .long 0
+ .size tls0, 4
+
+.type  tls1, at object
+.globl tls1
+.align 4
+tls1:
+ .long 0
+ .size tls1, 4
+
+.section .text
+.globl ___tls_get_addr
+.type ___tls_get_addr, at function
+___tls_get_addr:
+
+.section .text
+.globl _start
+_start:
+//LD -> LE:
+leal tls0 at tlsldm(%ebx),%eax
+call ___tls_get_addr at plt
+leal tls0 at dtpoff(%eax),%edx
+leal tls1 at tlsldm(%ebx),%eax
+call ___tls_get_addr at plt
+leal tls1 at dtpoff(%eax),%edx
+//IE -> LE:
+movl %gs:0,%eax
+movl tls0 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+movl tls1 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+addl tls0 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+addl tls1 at gotntpoff(%ebx),%eax




More information about the llvm-commits mailing list