[lld] r254101 - [ELF] - Implemented optimizations for @tlsld and @tlsgd

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 25 13:46:05 PST 2015


Author: grimar
Date: Wed Nov 25 15:46:05 2015
New Revision: 254101

URL: http://llvm.org/viewvc/llvm-project?rev=254101&view=rev
Log:
[ELF] - Implemented optimizations for @tlsld and @tlsgd

Implements @tlsld (LD to LE) and @tlsgd (GD to LE) optimizations.
Patch does not implement the GD->IE case for @tlsgd.

Differential revision: http://reviews.llvm.org/D14870

Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/Target.cpp
    lld/trunk/ELF/Target.h
    lld/trunk/ELF/Writer.cpp
    lld/trunk/test/ELF/tls-opt.s

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Wed Nov 25 15:46:05 2015
@@ -98,7 +98,9 @@ void InputSectionBase<ELFT>::relocate(
     uint8_t *Buf, uint8_t *BufEnd,
     iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
   typedef Elf_Rel_Impl<ELFT, isRela> RelType;
-  for (const RelType &RI : Rels) {
+  size_t Num = Rels.end() - Rels.begin();
+  for (size_t I = 0; I < Num; ++I) {
+    const RelType &RI = *(Rels.begin() + I);
     uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
     uint32_t Type = RI.getType(Config->Mips64EL);
     uintX_t Offset = getOffset(RI.r_offset);
@@ -108,7 +110,8 @@ void InputSectionBase<ELFT>::relocate(
     uint8_t *BufLoc = Buf + Offset;
     uintX_t AddrLoc = OutSec->getVA() + Offset;
 
-    if (Target->isTlsLocalDynamicReloc(Type)) {
+    if (Target->isTlsLocalDynamicReloc(Type) &&
+        !Target->isTlsOptimized(Type, nullptr)) {
       Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
                           Out<ELFT>::Got->getVA() +
                               Out<ELFT>::LocalModuleTlsIndexOffset +
@@ -127,16 +130,20 @@ void InputSectionBase<ELFT>::relocate(
 
     SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
 
-    if (Target->isTlsGlobalDynamicReloc(Type)) {
+    if (Target->isTlsGlobalDynamicReloc(Type) &&
+        !Target->isTlsOptimized(Type, &Body)) {
       Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
                           Out<ELFT>::Got->getEntryAddr(Body) +
                               getAddend<ELFT>(RI));
       continue;
     }
 
-    if (Target->isTlsOptimized(Type, Body)) {
-      Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc,
-                                  getSymVA<ELFT>(Body));
+    if (Target->isTlsOptimized(Type, &Body)) {
+      // By optimizing TLS relocations, it is sometimes needed to skip
+      // relocations that immediately follow TLS relocations. This function
+      // knows how many slots we need to skip.
+      I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
+                                       getSymVA<ELFT>(Body));
       continue;
     }
 

Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Wed Nov 25 15:46:05 2015
@@ -80,9 +80,17 @@ public:
   void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
                    uint64_t SA) const override;
   bool isRelRelative(uint32_t Type) const override;
-  bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override;
-  void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
-                           uint64_t SA) const override;
+  bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
+  unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+                               uint64_t P, uint64_t SA) const override;
+
+private:
+  void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
+  void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
+  void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                         uint64_t SA) const;
 };
 
 class PPC64TargetInfo final : public TargetInfo {
@@ -161,7 +169,7 @@ TargetInfo *createTarget() {
 
 TargetInfo::~TargetInfo() {}
 
-bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const {
+bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
   return false;
 }
 
@@ -177,8 +185,11 @@ unsigned TargetInfo::getPltRefReloc(unsi
 
 bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
 
-void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
-                                     uint64_t SA) const {}
+unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+                                         uint32_t Type, uint64_t P,
+                                         uint64_t SA) const {
+  return 0;
+}
 
 void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
 
@@ -364,7 +375,7 @@ bool X86_64TargetInfo::relocNeedsCopy(ui
 
 bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
   if (Type == R_X86_64_GOTTPOFF)
-    return !isTlsOptimized(Type, S);
+    return !isTlsOptimized(Type, &S);
   return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
          relocNeedsPlt(Type, S);
 }
@@ -435,10 +446,54 @@ bool X86_64TargetInfo::isRelRelative(uin
 }
 
 bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
-                                      const SymbolBody &S) const {
-  if (Config->Shared || !S.isTLS())
+                                      const SymbolBody *S) const {
+  if (Config->Shared || (S && !S->isTLS()))
     return false;
-  return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true);
+  return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
+         (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
+         (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
+// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how LD can be optimized to LE:
+//   leaq bar at tlsld(%rip), %rdi
+//   callq __tls_get_addr at PLT
+//   leaq bar at dtpoff(%rax), %rcx
+// Is converted to:
+//  .word 0x6666
+//  .byte 0x66
+//  mov %fs:0,%rax
+//  leaq bar at tpoff(%rax), %rcx
+void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd,
+                                         uint64_t P, uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x66, 0x66,                                          //.word 0x6666
+      0x66,                                                //.byte 0x66
+      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
+  };
+  memcpy(Loc - 3, Inst, sizeof(Inst));
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
+// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how GD can be optimized to LE:
+//  .byte 0x66
+//  leaq x at tlsgd(%rip), %rdi
+//  .word 0x6666
+//  rex64
+//  call __tls_get_addr at plt
+// Is converted to:
+//  mov %fs:0x0,%rax
+//  lea x at tpoff,%rax
+void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd,
+                                         uint64_t P, uint64_t SA) const {
+  const uint8_t Inst[] = {
+      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
+      0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00              // lea x at tpoff,%rax
+  };
+  memcpy(Loc - 4, Inst, sizeof(Inst));
+  relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
 }
 
 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
@@ -446,8 +501,8 @@ bool X86_64TargetInfo::isTlsOptimized(un
 // This function does that. Read "ELF Handling For Thread-Local Storage,
 // 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
 // by Ulrich Drepper for details.
-void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
-                                           uint64_t P, uint64_t SA) const {
+void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd,
+                                         uint64_t P, uint64_t SA) const {
   // Ulrich's document section 6.5 says that @gottpoff(%rip) must be
   // used in MOVQ or ADDQ instructions only.
   // "MOVQ foo at GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
@@ -476,6 +531,33 @@ void X86_64TargetInfo::relocateTlsOptimi
   relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
 }
 
+// This function applies a TLS relocation with an optimization as described
+// in the Ulrich's document. As a result of rewriting instructions at the
+// relocation target, relocations immediately follow the TLS relocation (which
+// would be applied to rewritten instructions) may have to be skipped.
+// This function returns a number of relocations that need to be skipped.
+unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+                                               uint32_t Type, uint64_t P,
+                                               uint64_t SA) const {
+  switch (Type) {
+  case R_X86_64_GOTTPOFF:
+    relocateTlsIeToLe(Loc, BufEnd, P, SA);
+    return 0;
+  case R_X86_64_TLSLD:
+    relocateTlsLdToLe(Loc, BufEnd, P, SA);
+    // The next relocation should be against __tls_get_addr, so skip it
+    return 1;
+  case R_X86_64_TLSGD:
+    relocateTlsGdToLe(Loc, BufEnd, P, SA);
+    // The next relocation should be against __tls_get_addr, so skip it
+    return 1;
+  case R_X86_64_DTPOFF32:
+    relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
+    return 0;
+  }
+  llvm_unreachable("Unknown TLS optimization");
+}
+
 void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                                    uint64_t P, uint64_t SA) const {
   switch (Type) {

Modified: lld/trunk/ELF/Target.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.h?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Target.h (original)
+++ lld/trunk/ELF/Target.h Wed Nov 25 15:46:05 2015
@@ -59,9 +59,10 @@ public:
   virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
   virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                            uint64_t P, uint64_t SA) const = 0;
-  virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const;
-  virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
-                                   uint64_t SA) const;
+  virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
+  virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+                                       uint32_t Type, uint64_t P,
+                                       uint64_t SA) const;
   virtual ~TargetInfo();
 
 protected:

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Wed Nov 25 15:46:05 2015
@@ -203,6 +203,8 @@ void Writer<ELFT>::scanRelocs(
     uint32_t Type = RI.getType(Config->Mips64EL);
 
     if (Target->isTlsLocalDynamicReloc(Type)) {
+      if (Target->isTlsOptimized(Type, nullptr))
+        continue;
       if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
         Out<ELFT>::LocalModuleTlsIndexOffset =
             Out<ELFT>::Got->addLocalModuleTlsIndex();
@@ -220,6 +222,8 @@ void Writer<ELFT>::scanRelocs(
       Body = Body->repl();
 
     if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
+      if (Target->isTlsOptimized(Type, Body))
+        continue;
       if (Body->isInGot())
         continue;
       Out<ELFT>::Got->addDynTlsEntry(Body);

Modified: lld/trunk/test/ELF/tls-opt.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt.s?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/test/ELF/tls-opt.s (original)
+++ lld/trunk/test/ELF/tls-opt.s Wed Nov 25 15:46:05 2015
@@ -20,12 +20,21 @@
 // DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
 // DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
 // DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
-
 // Corrupred output:
 // DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
 // DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
 // DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
 // DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
+// LD to LE:
+// DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff                leaq -8(%rax), %rcx
+// DISASM-NEXT: 1107e: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 1108a: 48 8d 88 fc ff ff ff                leaq -4(%rax), %rcx
+// GD to LE:
+// DISASM-NEXT: 11091: 64 48 8b 04 25 00 00 00 00          movq %fs:0, %rax
+// DISASM-NEXT: 1109a: 48 8d 80 f8 ff ff ff                leaq -8(%rax), %rax
+// DISASM-NEXT: 110a1: 64 48 8b 04 25 00 00 00 00          movq %fs:0, %rax
+// DISASM-NEXT: 110aa: 48 8d 80 fc ff ff ff                leaq -4(%rax), %rax
 
 .type tls0, at object
 .section .tbss,"awT", at nobits
@@ -62,3 +71,23 @@ _start:
  xchgq tls0 at gottpoff(%rip),%rax
  shlq tls0 at gottpoff
  rolq tls0 at gottpoff
+
+ //LD to LE:
+ leaq tls0 at tlsld(%rip), %rdi
+ callq __tls_get_addr at PLT
+ leaq tls0 at dtpoff(%rax),%rcx
+ leaq tls1 at tlsld(%rip), %rdi
+ callq __tls_get_addr at PLT
+ leaq tls1 at dtpoff(%rax),%rcx
+
+ //GD to LE:
+ .byte 0x66
+ leaq tls0 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt
+ .byte 0x66
+ leaq tls1 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt




More information about the llvm-commits mailing list