[lld] r253966 - [ELF2] - Optimization for R_X86_64_GOTTPOFF relocation.

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 24 01:00:06 PST 2015


Author: grimar
Date: Tue Nov 24 03:00:06 2015
New Revision: 253966

URL: http://llvm.org/viewvc/llvm-project?rev=253966&view=rev
Log:
[ELF2] - Optimization for R_X86_64_GOTTPOFF relocation.

R_X86_64_GOTTPOFF is not always requires GOT entries. Some relocations can be converted to local ones.

Differential revision: http://reviews.llvm.org/D14713

Added:
    lld/trunk/test/ELF/tls-opt.s
Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/Target.cpp
    lld/trunk/ELF/Target.h

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=253966&r1=253965&r2=253966&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Tue Nov 24 03:00:06 2015
@@ -134,6 +134,12 @@ void InputSectionBase<ELFT>::relocate(
       continue;
     }
 
+    if (Target->isTlsOptimized(Type, Body)) {
+      Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc,
+                                  getSymVA<ELFT>(Body));
+      continue;
+    }
+
     uintX_t SymVA = getSymVA<ELFT>(Body);
     if (Target->relocNeedsPlt(Type, Body)) {
       SymVA = Out<ELFT>::Plt->getEntryAddr(Body);

Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=253966&r1=253965&r2=253966&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Tue Nov 24 03:00:06 2015
@@ -74,6 +74,9 @@ public:
   void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
                    uint64_t SA) const override;
   bool isRelRelative(uint32_t Type) const override;
+  bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override;
+  void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                           uint64_t SA) const override;
 };
 
 class PPC64TargetInfo final : public TargetInfo {
@@ -148,6 +151,10 @@ TargetInfo *createTarget() {
 
 TargetInfo::~TargetInfo() {}
 
+bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const {
+  return false;
+}
+
 uint64_t TargetInfo::getVAStart() const { return Config->Shared ? 0 : VAStart; }
 
 bool TargetInfo::relocNeedsCopy(uint32_t Type, const SymbolBody &S) const {
@@ -162,6 +169,9 @@ bool TargetInfo::relocPointsToGot(uint32
 
 bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
 
+void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                     uint64_t SA) const {}
+
 void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
 
 void TargetInfo::writeGotPltHeaderEntries(uint8_t *Buf) const {}
@@ -279,6 +289,8 @@ bool X86_64TargetInfo::relocNeedsCopy(ui
 }
 
 bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
+  if (Type == R_X86_64_GOTTPOFF)
+    return !isTlsOptimized(Type, S);
   return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
          relocNeedsPlt(Type, S);
 }
@@ -344,6 +356,48 @@ bool X86_64TargetInfo::isRelRelative(uin
   }
 }
 
+bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
+                                      const SymbolBody &S) const {
+  if (Config->Shared || !S.isTLS())
+    return false;
+  return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true);
+}
+
+// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
+// R_X86_64_TPOFF32 so that R_X86_64_TPOFF32 so that it does not use GOT.
+// This function does that. Read "ELF Handling For Thread-Local Storage,
+// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
+// by Ulrich Drepper for details.
+void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+                                           uint64_t P, uint64_t SA) const {
+  // Ulrich's document section 6.5 says that @gottpoff(%rip) must be
+  // used in MOVQ or ADDQ instructions only.
+  // "MOVQ foo at GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
+  // "ADDQ foo at GOTTPOFF(%RIP), %REG" is transformed to "LEAQ foo(%REG), %REG"
+  // (if the register is not RSP/R12) or "ADDQ $foo, %RSP".
+  // Opcodes info can be found at http://ref.x86asm.net/coder64.html#x48.
+  uint8_t *Prefix = Loc - 3;
+  uint8_t *Inst = Loc - 2;
+  uint8_t *RegSlot = Loc - 1;
+  uint8_t Reg = Loc[-1] >> 3;
+  bool IsMov = *Inst == 0x8b;
+  bool RspAdd = !IsMov && Reg == 4;
+  // r12 and rsp registers requires special handling.
+  // Problem is that for other registers, for example leaq 0xXXXXXXXX(%r11),%r11
+  // result out is 7 bytes: 4d 8d 9b XX XX XX XX,
+  // but leaq 0xXXXXXXXX(%r12),%r12 is 8 bytes: 4d 8d a4 24 XX XX XX XX.
+  // The same true for rsp. So we convert to addq for them, saving 1 byte that
+  // we dont have.
+  if (RspAdd)
+    *Inst = 0x81;
+  else
+    *Inst = IsMov ? 0xc7 : 0x8d;
+  if (*Prefix == 0x4c)
+    *Prefix = (IsMov || RspAdd) ? 0x49 : 0x4d;
+  *RegSlot = (IsMov || RspAdd) ? (0xc0 | Reg) : (0x80 | Reg | (Reg << 3));
+  relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
+}
+
 void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                                    uint64_t P, uint64_t SA) const {
   switch (Type) {

Modified: lld/trunk/ELF/Target.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.h?rev=253966&r1=253965&r2=253966&view=diff
==============================================================================
--- lld/trunk/ELF/Target.h (original)
+++ lld/trunk/ELF/Target.h Tue Nov 24 03:00:06 2015
@@ -59,7 +59,9 @@ public:
   virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
   virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
                            uint64_t P, uint64_t SA) const = 0;
-
+  virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const;
+  virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+                                   uint64_t SA) const;
   virtual ~TargetInfo();
 
 protected:

Added: lld/trunk/test/ELF/tls-opt.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt.s?rev=253966&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt.s (added)
+++ lld/trunk/test/ELF/tls-opt.s Tue Nov 24 03:00:06 2015
@@ -0,0 +1,64 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+// RUN: ld.lld %t.o -o %t1
+// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+// NORELOC:      Relocations [
+// NORELOC-NEXT: ]
+
+// DISASM:      Disassembly of section .text:
+// DISASM-NEXT: _start:
+// DISASM-NEXT: 11000: 48 c7 c0 f8 ff ff ff movq $-8, %rax
+// DISASM-NEXT: 11007: 49 c7 c7 f8 ff ff ff movq $-8, %r15
+// DISASM-NEXT: 1100e: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
+// DISASM-NEXT: 11015: 4d 8d bf f8 ff ff ff leaq -8(%r15), %r15
+// DISASM-NEXT: 1101c: 48 81 c4 f8 ff ff ff addq $-8, %rsp
+// DISASM-NEXT: 11023: 49 81 c4 f8 ff ff ff addq $-8, %r12
+// DISASM-NEXT: 1102a: 48 c7 c0 fc ff ff ff movq $-4, %rax
+// DISASM-NEXT: 11031: 49 c7 c7 fc ff ff ff movq $-4, %r15
+// DISASM-NEXT: 11038: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
+// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
+// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
+// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
+
+// Corrupred output:
+// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
+// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
+// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
+// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
+
+.type tls0, at object
+.section .tbss,"awT", at nobits
+.globl tls0
+.align 4
+tls0:
+ .long 0
+ .size tls0, 4
+
+.type  tls1, at object
+.globl tls1
+.align 4
+tls1:
+ .long 0
+ .size tls1, 4
+
+.section .text
+.globl _start
+_start:
+ movq tls0 at GOTTPOFF(%rip), %rax
+ movq tls0 at GOTTPOFF(%rip), %r15
+ addq tls0 at GOTTPOFF(%rip), %rax
+ addq tls0 at GOTTPOFF(%rip), %r15
+ addq tls0 at GOTTPOFF(%rip), %rsp
+ addq tls0 at GOTTPOFF(%rip), %r12
+ movq tls1 at GOTTPOFF(%rip), %rax
+ movq tls1 at GOTTPOFF(%rip), %r15
+ addq tls1 at GOTTPOFF(%rip), %rax
+ addq tls1 at GOTTPOFF(%rip), %r15
+ addq tls1 at GOTTPOFF(%rip), %rsp
+ addq tls1 at GOTTPOFF(%rip), %r12
+
+ //Invalid input case:
+ xchgq tls0 at gottpoff(%rip),%rax
+ shlq tls0 at gottpoff
+ rolq tls0 at gottpoff




More information about the llvm-commits mailing list