[lld] r255103 - [ELF] - Implement the TLS relocation optimization for 32-bit x86.
George Rimar via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 9 01:55:54 PST 2015
Author: grimar
Date: Wed Dec 9 03:55:54 2015
New Revision: 255103
URL: http://llvm.org/viewvc/llvm-project?rev=255103&view=rev
Log:
[ELF] - Implement the TLS relocation optimization for 32-bit x86.
Implement the TLS relocation optimization for 32-bit x86 that is described in
"ELF Handling For Thread-Local Storage" by Ulrich Drepper, chapter 5,
"IA-32 Linker Optimizations". Specifically, this patch implements these
optimizations: LD->LE, GD->IE, GD->LD, and IE->LE.
Differential revision: http://reviews.llvm.org/D15292
Added:
lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s
lld/trunk/test/ELF/tls-opt-gdiele-i686.s
lld/trunk/test/ELF/tls-opt-i686.s
Modified:
lld/trunk/ELF/Target.cpp
Modified: lld/trunk/ELF/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Target.cpp?rev=255103&r1=255102&r2=255103&view=diff
==============================================================================
--- lld/trunk/ELF/Target.cpp (original)
+++ lld/trunk/ELF/Target.cpp Wed Dec 9 03:55:54 2015
@@ -88,6 +88,20 @@ public:
bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override;
void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P,
uint64_t SA, uint8_t *PairedLoc = nullptr) const override;
+ bool isTlsOptimized(unsigned Type, const SymbolBody *S) const override;
+ unsigned relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type,
+ uint64_t P, uint64_t SA,
+ const SymbolBody &S) const override;
+
+private:
+ void relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
+ void relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
+ void relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
+ void relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const;
};
class X86_64TargetInfo final : public TargetInfo {
@@ -260,7 +274,7 @@ bool X86TargetInfo::isTlsDynReloc(unsign
if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 ||
Type == R_386_TLS_GOTIE)
return Config->Shared;
- return false;
+ return Type == R_386_TLS_GD;
}
void X86TargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr,
@@ -311,8 +325,11 @@ bool X86TargetInfo::relocNeedsCopy(uint3
}
bool X86TargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const {
- return Type == R_386_TLS_GOTIE || Type == R_386_GOT32 ||
- relocNeedsPlt(Type, S);
+ if (S.isTLS() && Type == R_386_TLS_GD)
+ return Target->isTlsOptimized(Type, &S) && canBePreempted(&S, true);
+ if (Type == R_386_TLS_GOTIE)
+ return !isTlsOptimized(Type, &S);
+ return Type == R_386_GOT32 || relocNeedsPlt(Type, S);
}
bool X86TargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const {
@@ -358,6 +375,121 @@ void X86TargetInfo::relocateOne(uint8_t
}
}
+bool X86TargetInfo::isTlsOptimized(unsigned Type, const SymbolBody *S) const {
+ if (Config->Shared || (S && !S->isTLS()))
+ return false;
+ return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM ||
+ Type == R_386_TLS_GD ||
+ (Type == R_386_TLS_GOTIE && !canBePreempted(S, true));
+}
+
+unsigned X86TargetInfo::relocateTlsOptimize(uint8_t *Loc, uint8_t *BufEnd,
+ uint32_t Type, uint64_t P,
+ uint64_t SA,
+ const SymbolBody &S) const {
+ switch (Type) {
+ case R_386_TLS_GD:
+ if (canBePreempted(&S, true))
+ relocateTlsGdToIe(Loc, BufEnd, P, SA);
+ else
+ relocateTlsGdToLe(Loc, BufEnd, P, SA);
+ // The next relocation should be against __tls_get_addr, so skip it
+ return 1;
+ case R_386_TLS_GOTIE:
+ relocateTlsIeToLe(Loc, BufEnd, P, SA);
+ return 0;
+ case R_386_TLS_LDM:
+ relocateTlsLdToLe(Loc, BufEnd, P, SA);
+ // The next relocation should be against __tls_get_addr, so skip it
+ return 1;
+ case R_386_TLS_LDO_32:
+ relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
+ return 0;
+ }
+ llvm_unreachable("Unknown TLS optimization");
+}
+
+// "Ulrich Drepper, ELF Handling For Thread-Local Storage" (5.1
+// IA-32 Linker Optimizations, http://www.akkadia.org/drepper/tls.pdf) shows
+// how GD can be optimized to IE:
+// leal x at tlsgd(, %ebx, 1),
+// call __tls_get_addr at plt
+// Is converted to:
+// movl %gs:0, %eax
+// addl x at gotntpoff(%ebx), %eax
+void X86TargetInfo::relocateTlsGdToIe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const {
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax
+ };
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+ relocateOne(Loc + 5, BufEnd, R_386_32, P,
+ SA - Out<ELF32LE>::Got->getVA() -
+ Out<ELF32LE>::Got->getNumEntries() * 4);
+}
+
+// GD can be optimized to LE:
+// leal x at tlsgd(, %ebx, 1),
+// call __tls_get_addr at plt
+// Can be converted to:
+// movl %gs:0,%eax
+// addl $x at ntpoff,%eax
+// But gold emits subl $foo at tpoff,%eax instead of addl.
+// These instructions are completely equal in behavior.
+// This method generates subl to be consistent with gold.
+void X86TargetInfo::relocateTlsGdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const {
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax
+ };
+ memcpy(Loc - 3, Inst, sizeof(Inst));
+ relocateOne(Loc + 5, BufEnd, R_386_32, P,
+ Out<ELF32LE>::TlsPhdr->p_memsz - SA);
+}
+
+// LD can be optimized to LE:
+// leal foo(%reg),%eax
+// call ___tls_get_addr
+// Is converted to:
+// movl %gs:0,%eax
+// nop
+// leal 0(%esi,1),%esi
+void X86TargetInfo::relocateTlsLdToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const {
+ const uint8_t Inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
+ 0x90, // nop
+ 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi
+ };
+ memcpy(Loc - 2, Inst, sizeof(Inst));
+}
+
+// In some conditions, R_386_TLS_GOTIE relocation can be optimized to
+// R_386_TLS_LE so that it does not use GOT.
+// This function does that. Read "ELF Handling For Thread-Local Storage,
+// 5.1 IA-32 Linker Optimizations" (http://www.akkadia.org/drepper/tls.pdf)
+// by Ulrich Drepper for details.
+void X86TargetInfo::relocateTlsIeToLe(uint8_t *Loc, uint8_t *BufEnd, uint64_t P,
+ uint64_t SA) const {
+ // Ulrich's document section 6.2 says that @gotntpoff can be
+ // used with MOVL or ADDL instructions.
+ // "MOVL foo at GOTTPOFF(%RIP), %REG" is transformed to "MOVL $foo, %REG".
+ // "ADDL foo at GOTNTPOFF(%RIP), %REG" is transformed to "LEAL foo(%REG), %REG"
+ // Note: gold converts to ADDL instead of LEAL.
+ uint8_t *Inst = Loc - 2;
+ uint8_t *RegSlot = Loc - 1;
+ uint8_t Reg = (Loc[-1] >> 3) & 7;
+ bool IsMov = *Inst == 0x8b;
+ *Inst = IsMov ? 0xc7 : 0x8d;
+ if (IsMov)
+ *RegSlot = 0xc0 | ((*RegSlot >> 3) & 7);
+ else
+ *RegSlot = 0x80 | Reg | (Reg << 3);
+ relocateOne(Loc, BufEnd, R_386_TLS_LE, P, SA);
+}
+
X86_64TargetInfo::X86_64TargetInfo() {
CopyReloc = R_X86_64_COPY;
PCRelReloc = R_X86_64_PC32;
Added: lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s (added)
+++ lld/trunk/test/ELF/Inputs/tls-opt-gdiele-i686.s Wed Dec 9 03:55:54 2015
@@ -0,0 +1,20 @@
+.type tlsshared0, at object
+.section .tbss,"awT", at nobits
+.globl tlsshared0
+.align 4
+tlsshared0:
+ .long 0
+ .size tlsshared0, 4
+
+.type tlsshared1, at object
+.globl tlsshared1
+.align 4
+tlsshared1:
+ .long 0
+ .size tlsshared1, 4
+
+.text
+ .globl __tls_get_addr
+ .align 16, 0x90
+ .type __tls_get_addr, at function
+__tls_get_addr:
Added: lld/trunk/test/ELF/tls-opt-gdiele-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt-gdiele-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt-gdiele-i686.s (added)
+++ lld/trunk/test/ELF/tls-opt-gdiele-i686.s Wed Dec 9 03:55:54 2015
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %p/Inputs/tls-opt-gdiele-i686.s -o %tso.o
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
+// RUN: ld.lld -shared %tso.o -o %tso
+// RUN: ld.lld %t.o %tso -o %tout
+// RUN: llvm-readobj -r %tout | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %tout | FileCheck --check-prefix=DISASM %s
+
+// NORELOC: Relocations [
+// NORELOC-NEXT: Section ({{.*}}) .rel.dyn {
+// NORELOC-NEXT: 0x12050 R_386_TLS_TPOFF tlsshared0 0x0
+// NORELOC-NEXT: 0x12054 R_386_TLS_TPOFF tlsshared1 0x0
+// NORELOC-NEXT: }
+// NORELOC-NEXT: ]
+
+// DISASM: Disassembly of section .text:
+// DISASM-NEXT: _start:
+// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11006: 03 83 f8 ff ff ff addl -8(%ebx), %eax
+// DISASM-NEXT: 1100c: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11012: 03 83 fc ff ff ff addl -4(%ebx), %eax
+// DISASM-NEXT: 11018: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 1101e: 81 e8 08 00 00 00 subl $8, %eax
+// DISASM-NEXT: 11024: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 1102a: 81 e8 04 00 00 00 subl $4, %eax
+
+.type tlsexe1, at object
+.section .tbss,"awT", at nobits
+.globl tlsexe1
+.align 4
+tlsexe1:
+ .long 0
+ .size tlsexe1, 4
+
+.type tlsexe2, at object
+.section .tbss,"awT", at nobits
+.globl tlsexe2
+.align 4
+tlsexe2:
+ .long 0
+ .size tlsexe2, 4
+
+.section .text
+.globl ___tls_get_addr
+.type ___tls_get_addr, at function
+___tls_get_addr:
+
+.section .text
+.globl _start
+_start:
+//GD->IE
+leal tlsshared0 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+leal tlsshared1 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+//GD->IE
+leal tlsexe1 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
+leal tlsexe2 at tlsgd(,%ebx,1),%eax
+call ___tls_get_addr at plt
Added: lld/trunk/test/ELF/tls-opt-i686.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tls-opt-i686.s?rev=255103&view=auto
==============================================================================
--- lld/trunk/test/ELF/tls-opt-i686.s (added)
+++ lld/trunk/test/ELF/tls-opt-i686.s Wed Dec 9 03:55:54 2015
@@ -0,0 +1,69 @@
+// RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %s -o %t.o
+// RUN: ld.lld %t.o -o %t1
+// RUN: llvm-readobj -r %t1 | FileCheck --check-prefix=NORELOC %s
+// RUN: llvm-objdump -d %t1 | FileCheck --check-prefix=DISASM %s
+
+// NORELOC: Relocations [
+// NORELOC-NEXT: ]
+
+// DISASM: Disassembly of section .text:
+// DISASM-NEXT: _start:
+// LD -> LE:
+// DISASM-NEXT: 11000: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11006: 90 nop
+// DISASM-NEXT: 11007: 8d 74 26 00 leal (%esi), %esi
+// DISASM-NEXT: 1100b: 8d 90 f8 ff ff ff leal -8(%eax), %edx
+// DISASM-NEXT: 11011: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11017: 90 nop
+// DISASM-NEXT: 11018: 8d 74 26 00 leal (%esi), %esi
+// DISASM-NEXT: 1101c: 8d 90 fc ff ff ff leal -4(%eax), %edx
+// IE -> LE:
+// 4294967288 == 0xFFFFFFF8
+// 4294967292 == 0xFFFFFFFC
+// DISASM-NEXT: 11022: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11028: c7 c0 f8 ff ff ff movl $4294967288, %eax
+// DISASM-NEXT: 1102e: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11034: c7 c0 fc ff ff ff movl $4294967292, %eax
+// DISASM-NEXT: 1103a: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 11040: 8d 80 f8 ff ff ff leal -8(%eax), %eax
+// DISASM-NEXT: 11046: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DISASM-NEXT: 1104c: 8d 80 fc ff ff ff leal -4(%eax), %eax
+.type tls0, at object
+.section .tbss,"awT", at nobits
+.globl tls0
+.align 4
+tls0:
+ .long 0
+ .size tls0, 4
+
+.type tls1, at object
+.globl tls1
+.align 4
+tls1:
+ .long 0
+ .size tls1, 4
+
+.section .text
+.globl ___tls_get_addr
+.type ___tls_get_addr, at function
+___tls_get_addr:
+
+.section .text
+.globl _start
+_start:
+//LD -> LE:
+leal tls0 at tlsldm(%ebx),%eax
+call ___tls_get_addr at plt
+leal tls0 at dtpoff(%eax),%edx
+leal tls1 at tlsldm(%ebx),%eax
+call ___tls_get_addr at plt
+leal tls1 at dtpoff(%eax),%edx
+//IE -> LE:
+movl %gs:0,%eax
+movl tls0 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+movl tls1 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+addl tls0 at gotntpoff(%ebx),%eax
+movl %gs:0,%eax
+addl tls1 at gotntpoff(%ebx),%eax
More information about the llvm-commits
mailing list