[clang] [llvm] [X86] Add Support for X86 TLSDESC Relocations (PR #83136)
Fangrui Song via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 15 00:16:27 PDT 2024
================
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+ at x = thread_local global i32 0, align 4
+ at y = internal thread_local global i32 0, align 4
+ at z = internal thread_local global i32 1, align 4
+ at t = external hidden thread_local global i32, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll .L0$pb
+; X86-NEXT: .L0$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp0:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X32-LABEL: f1:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: #APP
+; X32-NEXT: #NO_APP
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: #APP
+; X32-NEXT: #NO_APP
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
+ %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0
+ %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1
+ %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2
+ %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3
+ %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4
+ %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5
+ call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5)
+ ret ptr %b
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L1$pb
+; X86-NEXT: .L1$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp1:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT: movl %gs:0, %ecx
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: movl (%eax,%ecx), %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f2:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: movl %fs:0, %ecx
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: movl (%eax,%ecx), %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f2:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movq %fs:0, %rcx
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: movl (%rax,%rcx), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L2$pb
+; X86-NEXT: .L2$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp2:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f3:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f3:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L3$pb
+; X86-NEXT: .L3$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp3:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT: movl %gs:0, %edx
+; X86-NEXT: leal _TLS_MODULE_BASE_ at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X86-NEXT: movl y at DTPOFF(%eax,%edx), %ecx
+; X86-NEXT: addl z at DTPOFF(%eax,%edx), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f4:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: movl %fs:0, %edx
+; X32-NEXT: leal _TLS_MODULE_BASE_ at tlsdesc(%rip), %eax
+; X32-NEXT: callq *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X32-NEXT: movl y at DTPOFF(%eax,%edx), %ecx
+; X32-NEXT: addl z at DTPOFF(%eax,%edx), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f4:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movq %fs:0, %rdx
+; X64-NEXT: leaq _TLS_MODULE_BASE_ at tlsdesc(%rip), %rax
+; X64-NEXT: callq *_TLS_MODULE_BASE_ at tlscall(%rax)
+; X64-NEXT: movl y at DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT: addl z at DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = load i32, ptr @y, align 4
+ %2 = load i32, ptr @z, align 4
+ %3 = add nsw i32 %1, %2
+ ret i32 %3
+}
+
+define i32 @f5() nounwind {
----------------
MaskRay wrote:
It seems that f4 and f5 can be combined? Both y and z are of internal linkage, so there isn't any nuance. You could change one to protected visibility
https://github.com/llvm/llvm-project/pull/83136
More information about the cfe-commits
mailing list