[clang] [llvm] [X86] Add Support for X86 TLSDESC Relocations (PR #83136)

Fri Mar 15 00:16:27 PDT 2024

================
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+ at x = thread_local global i32 0, align 4
+ at y = internal thread_local global i32 0, align 4
+ at z = internal thread_local global i32 1, align 4
+ at t = external hidden thread_local global i32, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll .L0$pb
+; X86-NEXT:  .L0$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp0:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X32-LABEL: f1:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f1:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
+  %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0
+  %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1
+  %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2
+  %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3
+  %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4
+  %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5
+  call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5)
+  ret ptr %b
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L1$pb
+; X86-NEXT:  .L1$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp1:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT:    movl %gs:0, %ecx
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    movl (%eax,%ecx), %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f2:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %ecx
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    movl (%eax,%ecx), %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f2:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rcx
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    movl (%rax,%rcx), %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %2 = load i32, ptr %1
+  ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L2$pb
+; X86-NEXT:  .L2$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp2:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f3:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f3:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L3$pb
+; X86-NEXT:  .L3$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp3:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT:    movl %gs:0, %edx
+; X86-NEXT:    leal _TLS_MODULE_BASE_ at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X86-NEXT:    movl y at DTPOFF(%eax,%edx), %ecx
+; X86-NEXT:    addl z at DTPOFF(%eax,%edx), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f4:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %edx
+; X32-NEXT:    leal _TLS_MODULE_BASE_ at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X32-NEXT:    movl y at DTPOFF(%eax,%edx), %ecx
+; X32-NEXT:    addl z at DTPOFF(%eax,%edx), %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f4:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rdx
+; X64-NEXT:    leaq _TLS_MODULE_BASE_ at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *_TLS_MODULE_BASE_ at tlscall(%rax)
+; X64-NEXT:    movl y at DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT:    addl z at DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = load i32, ptr @y, align 4
+  %2 = load i32, ptr @z, align 4
+  %3 = add nsw i32 %1, %2
+  ret i32 %3
+}
+
+define i32 @f5() nounwind {
----------------
MaskRay wrote:

It seems that f4 and f5 can be combined?  Both y and z are of internal linkage, so there isn't any nuance. You could change one to protected visibility 

https://github.com/llvm/llvm-project/pull/83136