[lld] r254101 - [ELF] - Implemented optimizations for @tlsld and @tlsgd
George Rimar via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 25 13:46:05 PST 2015
Author: grimar
Date: Wed Nov 25 15:46:05 2015
New Revision: 254101
URL: http://llvm.org/viewvc/llvm-project?rev=254101&view=rev
Log:
[ELF] - Implemented optimizations for @tlsld and @tlsgd
Implements @tlsld (LD to LE) and @tlsgd (GD to LE) optimizations.
Patch does not implement the GD->IE case for @tlsgd.
Differential revision: http://reviews.llvm.org/D14870
Modified:
lld/trunk/ELF/InputSection.cpp
lld/trunk/ELF/Target.cpp
lld/trunk/ELF/Target.h
lld/trunk/ELF/Writer.cpp
lld/trunk/test/ELF/tls-opt.s
Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Wed Nov 25 15:46:05 2015
@@ -98,7 +98,9 @@ void InputSectionBase<ELFT>::relocate(
uint8_t *Buf, uint8_t *BufEnd,
iterator_range<const Elf_Rel_Impl<ELFT, isRela> *> Rels) {
typedef Elf_Rel_Impl<ELFT, isRela> RelType;
- for (const RelType &RI : Rels) {
+ size_t Num = Rels.end() - Rels.begin();
+ for (size_t I = 0; I < Num; ++I) {
+ const RelType &RI = *(Rels.begin() + I);
uint32_t SymIndex = RI.getSymbol(Config->Mips64EL);
uint32_t Type = RI.getType(Config->Mips64EL);
uintX_t Offset = getOffset(RI.r_offset);
@@ -108,7 +110,8 @@ void InputSectionBase<ELFT>::relocate(
uint8_t *BufLoc = Buf + Offset;
uintX_t AddrLoc = OutSec->getVA() + Offset;
- if (Target->isTlsLocalDynamicReloc(Type)) {
+ if (Target->isTlsLocalDynamicReloc(Type) &&
+ !Target->isTlsOptimized(Type, nullptr)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getVA() +
Out<ELFT>::LocalModuleTlsIndexOffset +
@@ -127,16 +130,20 @@ void InputSectionBase<ELFT>::relocate(
SymbolBody &Body = *File->getSymbolBody(SymIndex)->repl();
- if (Target->isTlsGlobalDynamicReloc(Type)) {
+ if (Target->isTlsGlobalDynamicReloc(Type) &&
+ !Target->isTlsOptimized(Type, &Body)) {
Target->relocateOne(BufLoc, BufEnd, Type, AddrLoc,
Out<ELFT>::Got->getEntryAddr(Body) +
getAddend<ELFT>(RI));
continue;
}
- if (Target->isTlsOptimized(Type, Body)) {
- Target->relocateTlsOptimize(BufLoc, BufEnd, AddrLoc,
- getSymVA<ELFT>(Body));
+ if (Target->isTlsOptimized(Type, &Body)) {
+ // By optimizing TLS relocations, it is sometimes needed to skip
+ // relocations that immediately follow TLS relocations. This function
+ // knows how many slots we need to skip.
+ I += Target->relocateTlsOptimize(BufLoc, BufEnd, Type, AddrLoc,
+ getSymVA<ELFT>(Body));
continue;
}
Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Wed Nov 25 15:46:05 2015
@@ -80,9 +80,17 @@ public:
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
uint64_t SA) const override;
bool isRelRelative(uint32_t Type) const override;
- bool isTlsOptimized(unsigned Type, const SymbolBody &S) const override;
- void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
- uint64_t SA) const override;
+ bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
+ unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+ uint64_t P, uint64_t SA) const override;
+
+private:
+ void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
+ void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
+ void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
};
class PPC64TargetInfo final : public TargetInfo {
@@ -161,7 +169,7 @@ TargetInfo *createTarget() {
TargetInfo::~TargetInfo() {}
-bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody &S) const {
+bool TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
return false;
}
@@ -177,8 +185,11 @@ unsigned TargetInfo::getPltRefReloc(unsi
bool TargetInfo::isRelRelative(uint32_t Type) const { return true; }
-void TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
- uint64_t SA) const {}
+unsigned TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+ uint32_t Type, uint64_t P,
+ uint64_t SA) const {
+ return 0;
+}
void TargetInfo::writeGotHeaderEntries(uint8_t *Buf) const {}
@@ -364,7 +375,7 @@ bool X86_64TargetInfo::relocNeedsCopy(ui
bool X86_64TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
if (Type == R_X86_64_GOTTPOFF)
- return !isTlsOptimized(Type, S);
+ return !isTlsOptimized(Type, &S);
return Type == R_X86_64_GOTTPOFF || Type == R_X86_64_GOTPCREL ||
relocNeedsPlt(Type, S);
}
@@ -435,10 +446,54 @@ bool X86_64TargetInfo::isRelRelative(uin
}
bool X86_64TargetInfo::isTlsOptimized(unsigned Type,
- const SymbolBody &S) const {
- if (Config->Shared || !S.isTLS())
+ const SymbolBody *S) const {
+ if (Config->Shared || (S && !S->isTLS()))
return false;
- return Type == R_X86_64_GOTTPOFF && !canBePreempted(&S, true);
+ return Type == R_X86_64_TLSLD || Type == R_X86_64_DTPOFF32 ||
+ (Type == R_X86_64_TLSGD && !canBePreempted(S, true)) ||
+ (Type == R_X86_64_GOTTPOFF && !canBePreempted(S, true));
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
+// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how LD can be optimized to LE:
+// leaq bar at tlsld(%rip), %rdi
+// callq __tls_get_addr at PLT
+// leaq bar at dtpoff(%rax), %rcx
+// Is converted to:
+// .word 0x6666
+// .byte 0x66
+// mov %fs:0,%rax
+// leaq bar at tpoff(%rax), %rcx
+void X86_64TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd,
+ uint64_t P, uint64_t SA) const {
+ const uint8_t Inst[] = {
+ 0x66, 0x66, //.word 0x6666
+ 0x66, //.byte 0x66
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
+ };
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.5
+// x86-x64 linker optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how GD can be optimized to LE:
+// .byte 0x66
+// leaq x at tlsgd(%rip), %rdi
+// .word 0x6666
+// rex64
+// call __tls_get_addr at plt
+// Is converted to:
+// mov %fs:0x0,%rax
+// lea x at tpoff,%rax
+void X86_64TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd,
+ uint64_t P, uint64_t SA) const {
+ const uint8_t Inst[] = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x at tpoff,%rax
+ };
+ memcpy(Loc - 4, Inst, sizeof(Inst));
+ relocateOne(Loc + 8, BufEnd, R_X86_64_TPOFF32, P, SA);
}
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
@@ -446,8 +501,8 @@ bool X86_64TargetInfo::isTlsOptimized(un
// This function does that. Read "ELF Handling For Thread-Local Storage,
// 5.5 x86-x64 linker optimizations" (http://www.akkadia.org/drepper/tls.pdf)
// by Ulrich Drepper for details.
-void X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
- uint64_t P, uint64_t SA) const {
+void X86_64TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd,
+ uint64_t P, uint64_t SA) const {
// Ulrich's document section 6.5 says that @gottpoff(%rip) must be
// used in MOVQ or ADDQ instructions only.
// "MOVQ foo at GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG".
@@ -476,6 +531,33 @@ void X86_64TargetInfo::relocateTlsOptimi
relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
}
+// This function applies a TLS relocation with an optimization as described
+// in the Ulrich's document. As a result of rewriting instructions at the
+// relocation target, relocations immediately follow the TLS relocation (which
+// would be applied to rewritten instructions) may have to be skipped.
+// This function returns a number of relocations that need to be skipped.
+unsigned X86_64TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+ uint32_t Type, uint64_t P,
+ uint64_t SA) const {
+ switch (Type) {
+ case R_X86_64_GOTTPOFF:
+ relocateTlsIeToLe(Loc, BufEnd, P, SA);
+ return 0;
+ case R_X86_64_TLSLD:
+ relocateTlsLdToLe(Loc, BufEnd, P, SA);
+ // The next relocation should be against __tls_get_addr, so skip it
+ return 1;
+ case R_X86_64_TLSGD:
+ relocateTlsGdToLe(Loc, BufEnd, P, SA);
+ // The next relocation should be against __tls_get_addr, so skip it
+ return 1;
+ case R_X86_64_DTPOFF32:
+ relocateOne(Loc, BufEnd, R_X86_64_TPOFF32, P, SA);
+ return 0;
+ }
+ llvm_unreachable("Unknown TLS optimization");
+}
+
void X86_64TargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA) const {
switch (Type) {
Modified: lld/trunk/ELF/Target.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.h?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Target.h (original)
+++ lld/trunk/ELF/Target.h Wed Nov 25 15:46:05 2015
@@ -59,9 +59,10 @@ public:
virtual bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const = 0;
virtual void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
uint64_t P, uint64_t SA) const = 0;
- virtual bool isTlsOptimized(unsigned Type, const SymbolBody &S) const;
- virtual void relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
- uint64_t SA) const;
+ virtual bool isTlsOptimized(unsigned Type, const SymbolBody *S) const;
+ virtual unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+ uint32_t Type, uint64_t P,
+ uint64_t SA) const;
virtual ~TargetInfo();
protected:
Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Wed Nov 25 15:46:05 2015
@@ -203,6 +203,8 @@ void Writer<ELFT>::scanRelocs(
uint32_t Type = RI.getType(Config->Mips64EL);
if (Target->isTlsLocalDynamicReloc(Type)) {
+ if (Target->isTlsOptimized(Type, nullptr))
+ continue;
if (Out<ELFT>::LocalModuleTlsIndexOffset == uint32_t(-1)) {
Out<ELFT>::LocalModuleTlsIndexOffset =
Out<ELFT>::Got->addLocalModuleTlsIndex();
@@ -220,6 +222,8 @@ void Writer<ELFT>::scanRelocs(
Body = Body->repl();
if (Body && Body->isTLS() && Target->isTlsGlobalDynamicReloc(Type)) {
+ if (Target->isTlsOptimized(Type, Body))
+ continue;
if (Body->isInGot())
continue;
Out<ELFT>::Got->addDynTlsEntry(Body);
Modified: lld/trunk/test/ELF/tls-opt.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt.s?rev=254101&r1=254100&r2=254101&view=diff
==============================================================================
--- lld/trunk/test/ELF/tls-opt.s (original)
+++ lld/trunk/test/ELF/tls-opt.s Wed Nov 25 15:46:05 2015
@@ -20,12 +20,21 @@
// DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15
// DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp
// DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12
-
// Corrupred output:
// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
// DISASM-NEXT: 1105b: 48 d1 81 c4 f8 ff ff rolq -1852(%rcx)
// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax)
// DISASM-NEXT: 11065: 81 c4 f8 ff ff ff addl $4294967288, %esp
+// LD to LE:
+// DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff leaq -8(%rax), %rcx
+// DISASM-NEXT: 1107e: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 1108a: 48 8d 88 fc ff ff ff leaq -4(%rax), %rcx
+// GD to LE:
+// DISASM-NEXT: 11091: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 1109a: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax
+// DISASM-NEXT: 110a1: 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax
+// DISASM-NEXT: 110aa: 48 8d 80 fc ff ff ff leaq -4(%rax), %rax
.type tls0, at object
.section .tbss,"awT", at nobits
@@ -62,3 +71,23 @@ _start:
xchgq tls0 at gottpoff(%rip),%rax
shlq tls0 at gottpoff
rolq tls0 at gottpoff
+
+ //LD to LE:
+ leaq tls0 at tlsld(%rip), %rdi
+ callq __tls_get_addr at PLT
+ leaq tls0 at dtpoff(%rax),%rcx
+ leaq tls1 at tlsld(%rip), %rdi
+ callq __tls_get_addr at PLT
+ leaq tls1 at dtpoff(%rax),%rcx
+
+ //GD to LE:
+ .byte 0x66
+ leaq tls0 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt
+ .byte 0x66
+ leaq tls1 at tlsgd(%rip),%rdi
+ .word 0x6666
+ rex64
+ call __tls_get_addr at plt
More information about the llvm-commits
mailing list