[lld] r254713 - [ELF] - Implemented @tlsgd optimization (GD->IE case, x64).

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 4 03:20:15 PST 2015


Author: grimar
Date: Fri Dec  4 05:20:13 2015
New Revision: 254713

URL: http://llvm.org/viewvc/llvm-project?rev=254713&view=rev
Log:
[ELF] - Implemented @tlsgd optimization (GD->IE case, x64).

"Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5 x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows how GD can be optimized to IE.
This patch implements the optimization.

Differential revision: http://reviews.llvm.org/D15000

Added:
    lld/trunk/test/ELF/Inputs/tls-opt-gdie.s
    lld/trunk/test/ELF/tls-opt-gdie.s
Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/Target.cpp
    lld/trunk/ELF/Target.h
    lld/trunk/ELF/Writer.cpp

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=254713&r1=254712&r2=254713&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Fri Dec  4 05:20:13 2015
@@ -169,11 +169,14 @@ void InputSectionBase<ELFT>::relocate(ui
     }
 
     if (Target->isTlsOptimized(Type, &Body)) {
+      uintX_t SymVA = Target->relocNeedsGot(Type, Body)
+                          ? Out<ELFT>::Got->getEntryAddr(Body)
+                          : getSymVA<ELFT>(Body);
       // By optimizing TLS relocations, it is sometimes needed to skip
       // relocations that immediately follow TLS relocations. This function
       // knows how many slots we need to skip.
-      I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
-                                       getSymVA<ELFT>(Body));
+      I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc, SymVA,
+                                       Body);
       continue;
     }
 

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=254713&r1=254712&r2=254713&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Fri Dec  4 05:20:13 2015
@@ -217,16 +217,23 @@ bool RelocationSection<ELFT>::applyTlsDy
     return true;
   }
 
-  if (Body && Target->isTlsGlobalDynamicReloc(Type)) {
+  if (!Body || !Target->isTlsGlobalDynamicReloc(Type))
+    return false;
+
+  if (Target->isTlsOptimized(Type, Body)) {
     P->setSymbolAndType(Body->getDynamicSymbolTableIndex(),
-                        Target->getTlsModuleIndexReloc(), Config->Mips64EL);
-    P->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body);
-    N->setSymbolAndType(Body->getDynamicSymbolTableIndex(),
-                        Target->getTlsOffsetReloc(), Config->Mips64EL);
-    N->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body) + sizeof(uintX_t);
+                        Target->getTlsGotReloc(), Config->Mips64EL);
+    P->r_offset = Out<ELFT>::Got->getEntryAddr(*Body);
     return true;
   }
-  return false;
+
+  P->setSymbolAndType(Body->getDynamicSymbolTableIndex(),
+                      Target->getTlsModuleIndexReloc(), Config->Mips64EL);
+  P->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body);
+  N->setSymbolAndType(Body->getDynamicSymbolTableIndex(),
+                      Target->getTlsOffsetReloc(), Config->Mips64EL);
+  N->r_offset = Out<ELFT>::Got->getGlobalDynAddr(*Body) + sizeof(uintX_t);
+  return true;
 }
 
 template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) {

Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=254713&r1=254712&r2=254713&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Fri Dec  4 05:20:13 2015
@@ -110,13 +110,16 @@ public:
   bool isRelRelative(uint32_t Type) const override;
   bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
   unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
-                               uint64_t P, uint64_t SA) const override;
+                               uint64_t P, uint64_t SA,
+                               const SymbolBody &S) const override;
 
 private:
   void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
                          uint64_t SA) const;
   void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
                          uint64_t SA) const;
+  void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
   void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
                          uint64_t SA) const;
 };
@@ -211,8 +214,8 @@ unsigned TargetInfo::getPltRefReloc(unsi
 bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
 
 unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
-                                         uint32_t Type, uint64_t P,
-                                         uint64_t SA) const {
+                                         uint32_t Type, uint64_t P, uint64_t SA,
+                                         const SymbolBody &S) const {
   return 0;
 }
 
@@ -417,14 +420,15 @@ bool X86_64TargetInfo::relocNeedsCopy(ui
 }
 
 bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
+  if (Type == R_X86_64_TLSGD)
+    return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true);
   if (Type == R_X86_64_GOTTPOFF)
     return !isTlsOptimized(Type, &S);
-  return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
-         relocNeedsPlt(Type, S);
+  return Type == R_X86_64_GOTPCREL || relocNeedsPlt(Type, S);
 }
 
 bool X86_64TargetInfo::isTlsDynReloc(unsigned Type) const {
-  return Type == R_X86_64_GOTTPOFF;
+  return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_TLSGD;
 }
 
 unsigned X86_64TargetInfo::getPltRefReloc(unsigned Type) const {
@@ -492,8 +496,8 @@ bool X86_64TargetInfo::isTlsOptimized(un
                                       const SymbolBody *S) const {
   if (Config->Shared || (S && !S->isTLS()))
     return false;
-  return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
-         (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
+  return Type == R_X86_64_TLSGD || Type == R_X86_64_TLSLD ||
+         Type == R_X86_64_DTPOFF32 ||
          (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
 }
 
@@ -539,6 +543,27 @@ void X86_64TargetInfo::relocateTlsGdToLe
   relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
 }
 
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
+// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how GD can be optimized to IE:
+//  .byte 0x66
+//  leaq x at tlsgd(%rip), %rdi
+//  .word 0x6666
+//  rex64
+//  call __tls_get_addr at plt
+// Is converted to:
+//  mov %fs:0x0,%rax
+//  addq x at tpoff,%rax
+void X86_64TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd,
+                                         uint64_t P, uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
+      0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00              // addq x at tpoff,%rax
+  };
+  memcpy(Loc - 4, Inst, sizeof(Inst));
+  relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF64, P + 12, SA);
+}
+
 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
 // R_X86_64_TPOFF32 so that R_X86_64_TPOFF32 so that it does not use GOT.
 // This function does that. Read "ELF Handling For Thread-Local Storage,
@@ -581,7 +606,8 @@ void X86_64TargetInfo::relocateTlsIeToLe
 // This function returns a number of relocations that need to be skipped.
 unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
                                                uint32_t Type, uint64_t P,
-                                               uint64_t SA) const {
+                                               uint64_t SA,
+                                               const SymbolBody &S) const {
   switch (Type) {
   case R_X86_64_DTPOFF32:
     relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
@@ -589,10 +615,14 @@ unsigned X86_64TargetInfo::relocateTlsOp
   case R_X86_64_GOTTPOFF:
     relocateTlsIeToLe(Loc, BufEnd, P, SA);
     return 0;
-  case R_X86_64_TLSGD:
-    relocateTlsGdToLe(Loc, BufEnd, P, SA);
+  case R_X86_64_TLSGD: {
+    if (canBePreempted(&S, true))
+      relocateTlsGdToIe(Loc, BufEnd, P, SA);
+    else
+      relocateTlsGdToLe(Loc, BufEnd, P, SA);
     // The next relocation should be against __tls_get_addr, so skip it
     return 1;
+  }
   case R_X86_64_TLSLD:
     relocateTlsLdToLe(Loc, BufEnd, P, SA);
     // The next relocation should be against __tls_get_addr, so skip it

Modified: lld/trunk/ELF/Target.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.h?rev=254713&r1=254712&r2=254713&view=diff
==============================================================================
--- lld/trunk/ELF/Target.h (original)
+++ lld/trunk/ELF/Target.h Fri Dec  4 05:20:13 2015
@@ -61,8 +61,8 @@ public:
                            uint8_t *PairedLoc = nullptr) const = 0;
   virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
   virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
-                                       uint32_t Type, uint64_t P,
-                                       uint64_t SA) const;
+                                       uint32_t Type, uint64_t P, uint64_t SA,
+                                       const SymbolBody &S) const;
   virtual ~TargetInfo();
 
 protected:

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=254713&r1=254712&r2=254713&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Fri Dec  4 05:20:13 2015
@@ -219,14 +219,15 @@ void Writer<ELFT>::scanRelocs(
       Body = Body->repl();
 
     if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
-      if (Target->isTlsOptimized(Type, Body))
-        continue;
-      if (Out<ELFT>::Got->addDynTlsEntry(Body)) {
+      bool Opt = Target->isTlsOptimized(Type, Body);
+      if (!Opt && Out<ELFT>::Got->addDynTlsEntry(Body)) {
         Out<ELFT>::RelaDyn->addReloc({&C, &RI});
         Out<ELFT>::RelaDyn->addReloc({nullptr, nullptr});
         Body->setUsedInDynamicReloc();
+        continue;
       }
-      continue;
+      if (!canBePreempted(Body, true))
+        continue;
     }
 
     if (Body && Body->isTLS() && !Target->isTlsDynReloc(Type))

Added: lld/trunk/test/ELF/Inputs/tls-opt-gdie.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/Inputs/tls-opt-gdie.s?rev=254713&view=auto
==============================================================================
--- lld/trunk/test/ELF/Inputs/tls-opt-gdie.s (added)
+++ lld/trunk/test/ELF/Inputs/tls-opt-gdie.s Fri Dec  4 05:20:13 2015
@@ -0,0 +1,20 @@
+.type tlsshared0, at object
+.section .tbss,"awT", at nobits
+.globl tlsshared0
+.align 4
+tlsshared0:
+ .long 0
+ .size tlsshared0, 4
+
+.type  tlsshared1, at object
+.globl tlsshared1
+.align 4
+tlsshared1:
+ .long 0
+ .size tlsshared1, 4
+
+.text
+.globl __tls_get_addr
+.align 16, 0x90
+.type __tls_get_addr, at function
+__tls_get_addr:

Added: lld/trunk/test/ELF/tls-opt-gdie.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt-gdie.s?rev=254713&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt-gdie.s (added)
+++ lld/trunk/test/ELF/tls-opt-gdie.s Fri Dec  4 05:20:13 2015
@@ -0,0 +1,55 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/tls-opt-gdie.s -o %tso.o
+// RUN: ld.lld -shared %tso.o -o %t.so
+// RUN: ld.lld %t.o %t.so -o %t1
+// RUN: llvm-readobj -s -r %t1 | FileCheck --check-prefix=RELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+//RELOC:      Section {
+//RELOC:      Index: 9
+//RELOC-NEXT: Name: .got
+//RELOC-NEXT: Type: SHT_PROGBITS
+//RELOC-NEXT: Flags [
+//RELOC-NEXT:   SHF_ALLOC
+//RELOC-NEXT:   SHF_WRITE
+//RELOC-NEXT: ]
+//RELOC-NEXT: Address: 0x120E0
+//RELOC-NEXT: Offset: 0x20E0
+//RELOC-NEXT: Size: 16
+//RELOC-NEXT: Link: 0
+//RELOC-NEXT: Info: 0
+//RELOC-NEXT: AddressAlignment: 8
+//RELOC-NEXT: EntrySize: 0
+//RELOC-NEXT: }
+//RELOC:      Relocations [
+//RELOC-NEXT:   Section (4) .rela.dyn {
+//RELOC-NEXT:     0x120E0 R_X86_64_TPOFF64 tlsshared0 0x0
+//RELOC-NEXT:     0x120E8 R_X86_64_TPOFF64 tlsshared1 0x0
+//RELOC-NEXT:   }
+//RELOC-NEXT:   Section (5) .rela.plt {
+//RELOC-NEXT:     0x12108 R_X86_64_JUMP_SLOT __tls_get_addr 0x0
+//RELOC-NEXT:   }
+//RELOC-NEXT: ]
+
+//0x11009 + (4304 + 7) = 0x120E0
+//0x11019 + (4296 + 7) = 0x120E8
+// DISASM:      Disassembly of section .text:
+// DISASM-NEXT: _start:
+// DISASM-NEXT: 11000: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 11009: 48 03 05 d0 10 00 00       addq 4304(%rip), %rax
+// DISASM-NEXT: 11010: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 11019: 48 03 05 c8 10 00 00       addq 4296(%rip), %rax
+
+.section .text
+.globl _start
+_start:
+ .byte 0x66
+ leaq tlsshared0 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt
+ .byte 0x66
+ leaq tlsshared1 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt




More information about the llvm-commits mailing list