[llvm] r263855 - [CXX_FAST_TLS] fix issues with O0 on ARM, AArch64 and X86.

Manman Ren via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 18 16:38:49 PDT 2016


Author: mren
Date: Fri Mar 18 18:38:49 2016
New Revision: 263855

URL: http://llvm.org/viewvc/llvm-project?rev=263855&view=rev
Log:
[CXX_FAST_TLS] fix issues with O0 on ARM, AArch64 and X86.

Since at O0, explicit copies via SplitCSR may not be removed even if
they are unnecessary, we choose not to use SplitCSR at O0.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
    llvm/trunk/test/CodeGen/AArch64/cxx-tlscc.ll
    llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll
    llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri Mar 18 18:38:49 2016
@@ -473,7 +473,7 @@ bool SelectionDAGISel::runOnMachineFunct
 
   // We split CSR if the target supports it for the given function
   // and the function has only return exits.
-  if (TLI->supportSplitCSR(MF)) {
+  if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
     FuncInfo->SplitCSR = true;
 
     // Collect all the return blocks.

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Mar 18 18:38:49 2016
@@ -2469,6 +2469,7 @@ CCAssignFn *AArch64TargetLowering::CCAss
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
+  case CallingConv::CXX_FAST_TLS:
     if (!Subtarget->isTargetDarwin())
       return CC_AArch64_AAPCS;
     return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;

Modified: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFastISel.cpp?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp Fri Mar 18 18:38:49 2016
@@ -1847,6 +1847,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCa
     }
     // Fallthrough
   case CallingConv::C:
+  case CallingConv::CXX_FAST_TLS:
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
       if (Subtarget->hasVFP2() &&

Modified: llvm/trunk/test/CodeGen/AArch64/cxx-tlscc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/cxx-tlscc.ll?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/cxx-tlscc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/cxx-tlscc.ll Fri Mar 18 18:38:49 2016
@@ -3,6 +3,8 @@
 ; Shrink wrapping currently does not kick in because we have a TLS CALL
 ; in the entry block and it will clobber the link register.
 
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s
+
 %struct.S = type { i8 }
 
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
@@ -76,6 +78,52 @@ __tls_init.exit:
 ; CHECK-NOT: ldp d29, d28
 ; CHECK-NOT: ldp d31, d30
 
+; CHECK-O0-LABEL: _ZTW2sg
+; CHECK-O0: stp d31, d30
+; CHECK-O0: stp d29, d28
+; CHECK-O0: stp d27, d26
+; CHECK-O0: stp d25, d24
+; CHECK-O0: stp d23, d22
+; CHECK-O0: stp d21, d20
+; CHECK-O0: stp d19, d18
+; CHECK-O0: stp d17, d16
+; CHECK-O0: stp d7, d6
+; CHECK-O0: stp d5, d4
+; CHECK-O0: stp d3, d2
+; CHECK-O0: stp d1, d0
+; CHECK-O0: stp x14, x13
+; CHECK-O0: stp x12, x11
+; CHECK-O0: stp x10, x9
+; CHECK-O0: stp x8, x7
+; CHECK-O0: stp x6, x5
+; CHECK-O0: stp x4, x3
+; CHECK-O0: stp x2, x1
+; CHECK-O0: blr
+; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
+; CHECK-O0: blr
+; CHECK-O0: tlv_atexit
+; CHECK-O0: [[BB_end]]:
+; CHECK-O0: blr
+; CHECK-O0: ldp x2, x1
+; CHECK-O0: ldp x4, x3
+; CHECK-O0: ldp x6, x5
+; CHECK-O0: ldp x8, x7
+; CHECK-O0: ldp x10, x9
+; CHECK-O0: ldp x12, x11
+; CHECK-O0: ldp x14, x13
+; CHECK-O0: ldp d1, d0
+; CHECK-O0: ldp d3, d2
+; CHECK-O0: ldp d5, d4
+; CHECK-O0: ldp d7, d6
+; CHECK-O0: ldp d17, d16
+; CHECK-O0: ldp d19, d18
+; CHECK-O0: ldp d21, d20
+; CHECK-O0: ldp d23, d22
+; CHECK-O0: ldp d25, d24
+; CHECK-O0: ldp d27, d26
+; CHECK-O0: ldp d29, d28
+; CHECK-O0: ldp d31, d30
+
 ; CHECK-LABEL: _ZTW4sum1
 ; CHECK-NOT: stp d31, d30
 ; CHECK-NOT: stp d29, d28
@@ -98,6 +146,64 @@ __tls_init.exit:
 ; CHECK-NOT: stp x4, x3
 ; CHECK-NOT: stp x2, x1
 ; CHECK: blr
+
+; CHECK-O0-LABEL: _ZTW4sum1
+; CHECK-O0-NOT: vstr
+; CHECK-O0-NOT: vldr
 define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
   ret i32* @sum1
 }
+
+; Make sure at O0, we don't generate spilling/reloading of the CSRs.
+; CHECK-O0-LABEL: tls_test2
+; CHECK-O0-NOT: stp d31, d30
+; CHECK-O0-NOT: stp d29, d28
+; CHECK-O0-NOT: stp d27, d26
+; CHECK-O0-NOT: stp d25, d24
+; CHECK-O0-NOT: stp d23, d22
+; CHECK-O0-NOT: stp d21, d20
+; CHECK-O0-NOT: stp d19, d18
+; CHECK-O0-NOT: stp d17, d16
+; CHECK-O0-NOT: stp d7, d6
+; CHECK-O0-NOT: stp d5, d4
+; CHECK-O0-NOT: stp d3, d2
+; CHECK-O0-NOT: stp d1, d0
+; CHECK-O0-NOT: stp x20, x19
+; CHECK-O0-NOT: stp x14, x13
+; CHECK-O0-NOT: stp x12, x11
+; CHECK-O0-NOT: stp x10, x9
+; CHECK-O0-NOT: stp x8, x7
+; CHECK-O0-NOT: stp x6, x5
+; CHECK-O0-NOT: stp x4, x3
+; CHECK-O0-NOT: stp x2, x1
+; CHECK-O0: bl {{.*}}tls_helper
+; CHECK-O0-NOT: ldp x2, x1
+; CHECK-O0-NOT: ldp x4, x3
+; CHECK-O0-NOT: ldp x6, x5
+; CHECK-O0-NOT: ldp x8, x7
+; CHECK-O0-NOT: ldp x10, x9
+; CHECK-O0-NOT: ldp x12, x11
+; CHECK-O0-NOT: ldp x14, x13
+; CHECK-O0-NOT: ldp x20, x19
+; CHECK-O0-NOT: ldp d1, d0
+; CHECK-O0-NOT: ldp d3, d2
+; CHECK-O0-NOT: ldp d5, d4
+; CHECK-O0-NOT: ldp d7, d6
+; CHECK-O0-NOT: ldp d17, d16
+; CHECK-O0-NOT: ldp d19, d18
+; CHECK-O0-NOT: ldp d21, d20
+; CHECK-O0-NOT: ldp d23, d22
+; CHECK-O0-NOT: ldp d25, d24
+; CHECK-O0-NOT: ldp d27, d26
+; CHECK-O0-NOT: ldp d29, d28
+; CHECK-O0-NOT: ldp d31, d30
+; CHECK-O0: ret
+%class.C = type { i32 }
+ at tC = internal thread_local global %class.C zeroinitializer, align 4
+declare cxx_fast_tlscc void @tls_helper()
+define cxx_fast_tlscc %class.C* @tls_test2() #1 {
+  call cxx_fast_tlscc void @tls_helper()
+  ret %class.C* @tC
+}
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind }

Modified: llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll Fri Mar 18 18:38:49 2016
@@ -3,6 +3,9 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios8.0 | FileCheck %s
 ; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
 
+; RUN: llc < %s -mtriple=armv7k-apple-watchos2.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=WATCH-O0 %s
+; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=IOS-O0 %s
+
 %struct.S = type { i8 }
 
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
@@ -10,6 +13,9 @@
 @__tls_guard = internal thread_local unnamed_addr global i1 false
 @sum1 = internal thread_local global i32 0, align 4
 
+%class.C = type { i32 }
+ at tC = internal thread_local global %class.C zeroinitializer, align 4
+
 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
 declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
 declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
@@ -36,7 +42,7 @@ __tls_init.exit:
 ; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
 ; CHECK: blx
 ; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
-; CHECK; blx
+; CHECK: blx
 ; CHECK: tlv_atexit
 ; CHECK: [[BB_end]]:
 ; CHECK: blx
@@ -46,12 +52,54 @@ __tls_init.exit:
 ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
 ; CHECK: pop {lr}
 
+; CHECK-O0-LABEL: _ZTW2sg
+; WATCH-O0: push {r1, r2, r3, r6, r7, lr}
+; IOS-O0: push {r1, r2, r3, r7, lr}
+; CHECK-O0: push {r9, r12}
+; CHECK-O0: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-O0: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK-O0: blx
+; CHECK-O0: bne [[BB_end:.?LBB0_[0-9]+]]
+; CHECK-O0: blx
+; CHECK-O0: tlv_atexit
+; CHECK-O0: [[BB_end]]:
+; CHECK-O0: blx
+; CHECK-O0: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK-O0: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-O0: pop {r9, r12}
+; WATCH-O0: pop {r1, r2, r3, r6, r7, pc}
+; IOS-O0: pop {r1, r2, r3, r7, pc}
+
 ; CHECK-LABEL: _ZTW4sum1
 ; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
 ; CHECK-NOT: push {r9, r12}
 ; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
 ; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
 ; CHECK: blx
+
+; CHECK-O0-LABEL: _ZTW4sum1
+; CHECK-O0-NOT: vpush
+; CHECK-O0-NOT: vstr
+; CHECK-O0-NOT: vpop
+; CHECK-O0-NOT: vldr
+; CHECK-O0: pop
 define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
   ret i32* @sum1
 }
+
+; Make sure at O0, we don't generate spilling/reloading of the CSRs.
+; CHECK-O0-LABEL: tls_test2
+; CHECK-O0: push
+; CHECK-O0-NOT: vpush
+; CHECK-O0-NOT: vstr
+; CHECK-O0: tls_helper
+; CHECK-O0-NOT: vpop
+; CHECK-O0-NOT: vldr
+; CHECK-O0: pop
+declare cxx_fast_tlscc void @tls_helper()
+define cxx_fast_tlscc %class.C* @tls_test2() #1 {
+  call cxx_fast_tlscc void @tls_helper()
+  ret %class.C* @tC
+}
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind }

Modified: llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll?rev=263855&r1=263854&r2=263855&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll Fri Mar 18 18:38:49 2016
@@ -39,6 +39,27 @@ declare i32 @_tlv_atexit(void (i8*)*, i8
 ; CHECK-NOT: popq %r9
 ; CHECK-NOT: popq %r10
 ; CHECK-NOT: popq %r11
+
+; CHECK-O0-LABEL: _ZTW2sg
+; CHECK-O0: pushq %r11
+; CHECK-O0: pushq %r10
+; CHECK-O0: pushq %r9
+; CHECK-O0: pushq %r8
+; CHECK-O0: pushq %rsi
+; CHECK-O0: pushq %rdx
+; CHECK-O0: pushq %rcx
+; CHECK-O0: callq
+; CHECK-O0: jne
+; CHECK-O0: callq
+; CHECK-O0: tlv_atexit
+; CHECK-O0: callq
+; CHECK-O0: popq %rcx
+; CHECK-O0: popq %rdx
+; CHECK-O0: popq %rsi
+; CHECK-O0: popq %r8
+; CHECK-O0: popq %r9
+; CHECK-O0: popq %r10
+; CHECK-O0: popq %r11
 define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
   %.b.i = load i1, i1* @__tls_guard, align 1
   br i1 %.b.i, label %__tls_init.exit, label %init.i
@@ -63,6 +84,24 @@ __tls_init.exit:
 ; CHECK-NOT: pushq %rcx
 ; CHECK-NOT: pushq %rbx
 ; CHECK: callq
+; CHECK-O0-LABEL: _ZTW4sum1
+; CHECK-O0-NOT: pushq %r11
+; CHECK-O0-NOT: pushq %r10
+; CHECK-O0-NOT: pushq %r9
+; CHECK-O0-NOT: pushq %r8
+; CHECK-O0-NOT: pushq %rsi
+; CHECK-O0-NOT: pushq %rdx
+; CHECK-O0-NOT: pushq %rcx
+; CHECK-O0-NOT: pushq %rbx
+; CHECK-O0-NOT: movq %r11
+; CHECK-O0-NOT: movq %r10
+; CHECK-O0-NOT: movq %r9
+; CHECK-O0-NOT: movq %r8
+; CHECK-O0-NOT: movq %rsi
+; CHECK-O0-NOT: movq %rdx
+; CHECK-O0-NOT: movq %rcx
+; CHECK-O0-NOT: movq %rbx
+; CHECK-O0: callq
 define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
   ret i32* @sum1
 }
@@ -76,4 +115,28 @@ define cxx_fast_tlscc i32* @_ZTW4sum2()
   ret i32* @sum1
 }
 
+; Make sure at O0, we don't generate spilling/reloading of the CSRs.
+; CHECK-O0-LABEL: tls_test2
+; CHECK-O0-NOT: pushq %r11
+; CHECK-O0-NOT: pushq %r10
+; CHECK-O0-NOT: pushq %r9
+; CHECK-O0-NOT: pushq %r8
+; CHECK-O0-NOT: pushq %rsi
+; CHECK-O0-NOT: pushq %rdx
+; CHECK-O0: callq {{.*}}tls_helper
+; CHECK-O0-NOT: popq %rdx
+; CHECK-O0-NOT: popq %rsi
+; CHECK-O0-NOT: popq %r8
+; CHECK-O0-NOT: popq %r9
+; CHECK-O0-NOT: popq %r10
+; CHECK-O0-NOT: popq %r11
+; CHECK-O0: ret
+%class.C = type { i32 }
+ at tC = internal thread_local global %class.C zeroinitializer, align 4
+declare cxx_fast_tlscc void @tls_helper()
+define cxx_fast_tlscc %class.C* @tls_test2() #1 {
+  call cxx_fast_tlscc void @tls_helper()
+  ret %class.C* @tC
+}
 attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind }




More information about the llvm-commits mailing list