<div dir="ltr"><br><div>Thanks Hal, I will take a look.</div><div><br></div><div>Manman</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jan 14, 2016 at 3:28 PM, Hal Finkel <span dir="ltr"><<a href="mailto:hfinkel@anl.gov" target="_blank">hfinkel@anl.gov</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi Manman,<br>
<br>
It seems this caused <a href="https://llvm.org/bugs/show_bug.cgi?id=26136" rel="noreferrer" target="_blank">https://llvm.org/bugs/show_bug.cgi?id=26136</a><br>
<br>
 -Hal<br>
<div class="HOEnZb"><div class="h5"><br>
----- Original Message -----<br>
> From: "Manman Ren via llvm-commits" <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>><br>
> To: <a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
> Sent: Monday, January 11, 2016 7:08:46 PM<br>
> Subject: [llvm] r257428 - CXX_FAST_TLS calling convention: performance improvement for x86-64.<br>
><br>
> Author: mren<br>
> Date: Mon Jan 11 19:08:46 2016<br>
> New Revision: 257428<br>
><br>
> URL: <a href="http://llvm.org/viewvc/llvm-project?rev=257428&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=257428&view=rev</a><br>
> Log:<br>
> CXX_FAST_TLS calling convention: performance improvement for x86-64.<br>
><br>
> This is the same change on x86-64 as r255821 on AArch64.<br>
> rdar://9001553<br>
><br>
> Modified:<br>
>     llvm/trunk/lib/Target/X86/X86CallingConv.td<br>
>     llvm/trunk/lib/Target/X86/X86FastISel.cpp<br>
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>
>     llvm/trunk/lib/Target/X86/X86ISelLowering.h<br>
>     llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h<br>
>     llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp<br>
>     llvm/trunk/lib/Target/X86/X86RegisterInfo.h<br>
>     llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX<br>
>  def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,<br>
>                                               R8, R9, R10, R11)>;<br>
><br>
> +// CSRs that are handled by prologue, epilogue.<br>
> +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;<br>
> +<br>
> +// CSRs that are handled explicitly via copies.<br>
> +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add<br>
> CSR_64_TLS_Darwin)>;<br>
> +<br>
>  // All GPRs - except r11<br>
>  def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX,<br>
>  RSI, RDI,<br>
>                                                R8, R9, R10, RSP)>;<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Ins<br>
>    if (!FuncInfo.CanLowerReturn)<br>
>      return false;<br>
><br>
> +  if (TLI.supportSplitCSR(FuncInfo.MF))<br>
> +    return false;<br>
> +<br>
>    CallingConv::ID CC = F.getCallingConv();<br>
>    if (CC != CallingConv::C &&<br>
>        CC != CallingConv::Fast &&<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -2311,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue C<br>
>          DAG.getRegister(RetValReg,<br>
>          getPointerTy(DAG.getDataLayout())));<br>
>    }<br>
><br>
> +  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();<br>
> +  const MCPhysReg *I =<br>
> +      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());<br>
> +  if (I) {<br>
> +    for (; *I; ++I) {<br>
> +      if (X86::GR64RegClass.contains(*I))<br>
> +        RetOps.push_back(DAG.getRegister(*I, MVT::i64));<br>
> +      else<br>
> +        llvm_unreachable("Unexpected register class in<br>
> CSRsViaCopy!");<br>
> +    }<br>
> +  }<br>
> +<br>
>    RetOps[0] = Chain;  // Update chain.<br>
><br>
>    // Add the flag if we have it.<br>
> @@ -28827,3 +28839,51 @@ bool X86TargetLowering::isIntDivCheap(EV<br>
>                                     Attribute::MinSize);<br>
>    return OptSize && !VT.isVector();<br>
>  }<br>
> +<br>
> +void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry)<br>
> const {<br>
> +  if (!Subtarget->is64Bit())<br>
> +    return;<br>
> +<br>
> +  // Update IsSplitCSR in X86MachineFunctionInfo.<br>
> +  X86MachineFunctionInfo *AFI =<br>
> +    Entry->getParent()->getInfo<X86MachineFunctionInfo>();<br>
> +  AFI->setIsSplitCSR(true);<br>
> +}<br>
> +<br>
> +void X86TargetLowering::insertCopiesSplitCSR(<br>
> +    MachineBasicBlock *Entry,<br>
> +    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {<br>
> +  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();<br>
> +  const MCPhysReg *IStart =<br>
> TRI->getCalleeSavedRegsViaCopy(Entry->getParent());<br>
> +  if (!IStart)<br>
> +    return;<br>
> +<br>
> +  const TargetInstrInfo *TII = Subtarget->getInstrInfo();<br>
> +  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();<br>
> +  for (const MCPhysReg *I = IStart; *I; ++I) {<br>
> +    const TargetRegisterClass *RC = nullptr;<br>
> +    if (X86::GR64RegClass.contains(*I))<br>
> +      RC = &X86::GR64RegClass;<br>
> +    else<br>
> +      llvm_unreachable("Unexpected register class in CSRsViaCopy!");<br>
> +<br>
> +    unsigned NewVR = MRI->createVirtualRegister(RC);<br>
> +    // Create copy from CSR to a virtual register.<br>
> +    // FIXME: this currently does not emit CFI pseudo-instructions,<br>
> it works<br>
> +    // fine for CXX_FAST_TLS since the C++-style TLS access<br>
> functions should be<br>
> +    // nounwind. If we want to generalize this later, we may need to<br>
> emit<br>
> +    // CFI pseudo-instructions.<br>
> +    assert(Entry->getParent()->getFunction()->hasFnAttribute(<br>
> +               Attribute::NoUnwind) &&<br>
> +           "Function should be nounwind in insertCopiesSplitCSR!");<br>
> +    Entry->addLiveIn(*I);<br>
> +    BuildMI(*Entry, Entry->begin(), DebugLoc(),<br>
> TII->get(TargetOpcode::COPY),<br>
> +            NewVR)<br>
> +        .addReg(*I);<br>
> +<br>
> +    for (auto *Exit : Exits)<br>
> +      BuildMI(*Exit, Exit->begin(), DebugLoc(),<br>
> TII->get(TargetOpcode::COPY),<br>
> +              *I)<br>
> +          .addReg(NewVR);<br>
> +  }<br>
> +}<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -1057,6 +1057,15 @@ namespace llvm {<br>
>                          const SmallVectorImpl<SDValue> &OutVals,<br>
>                          SDLoc dl, SelectionDAG &DAG) const override;<br>
><br>
> +    bool supportSplitCSR(MachineFunction *MF) const override {<br>
> +      return MF->getFunction()->getCallingConv() ==<br>
> CallingConv::CXX_FAST_TLS &&<br>
> +          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);<br>
> +    }<br>
> +    void initializeSplitCSR(MachineBasicBlock *Entry) const<br>
> override;<br>
> +    void insertCopiesSplitCSR(<br>
> +      MachineBasicBlock *Entry,<br>
> +      const SmallVectorImpl<MachineBasicBlock *> &Exits) const<br>
> override;<br>
> +<br>
>      bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const<br>
>      override;<br>
><br>
>      bool mayBeEmittedAsTailCall(CallInst *CI) const override;<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Mon Jan 11<br>
> 19:08:46 2016<br>
> @@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public Ma<br>
>    /// used to address arguments in a function using a base pointer.<br>
>    int SEHFramePtrSaveIndex = 0;<br>
><br>
> +  /// True if this function has a subset of CSRs that is handled<br>
> explicitly via<br>
> +  /// copies.<br>
> +  bool IsSplitCSR = false;<br>
> +<br>
>  private:<br>
>    /// ForwardedMustTailRegParms - A list of virtual and physical<br>
>    registers<br>
>    /// that must be forwarded to every musttail call.<br>
> @@ -160,6 +164,9 @@ public:<br>
>    SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms()<br>
>    {<br>
>      return ForwardedMustTailRegParms;<br>
>    }<br>
> +<br>
> +  bool isSplitCSR() const { return IsSplitCSR; }<br>
> +  void setIsSplitCSR(bool s) { IsSplitCSR = s; }<br>
>  };<br>
><br>
>  } // End llvm namespace<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(cons<br>
>      return CSR_64_RT_AllRegs_SaveList;<br>
>    case CallingConv::CXX_FAST_TLS:<br>
>      if (Is64Bit)<br>
> -      return CSR_64_TLS_Darwin_SaveList;<br>
> +      return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?<br>
> +             CSR_64_CXX_TLS_Darwin_PE_SaveList :<br>
> CSR_64_TLS_Darwin_SaveList;<br>
>      break;<br>
>    case CallingConv::Intel_OCL_BI: {<br>
>      if (HasAVX512 && IsWin64)<br>
> @@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(cons<br>
>    return CSR_32_SaveList;<br>
>  }<br>
><br>
> +const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(<br>
> +    const MachineFunction *MF) const {<br>
> +  assert(MF && "Invalid MachineFunction pointer.");<br>
> +  if (MF->getFunction()->getCallingConv() ==<br>
> CallingConv::CXX_FAST_TLS &&<br>
> +      MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())<br>
> +    return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;<br>
> +  return nullptr;<br>
> +}<br>
> +<br>
>  const uint32_t *<br>
>  X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,<br>
>                                        CallingConv::ID CC) const {<br>
><br>
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)<br>
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -99,6 +99,8 @@ public:<br>
>    /// callee-save registers on this target.<br>
>    const MCPhysReg *<br>
>    getCalleeSavedRegs(const MachineFunction* MF) const override;<br>
> +  const MCPhysReg *<br>
> +  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const<br>
> override;<br>
>    const uint32_t *getCallPreservedMask(const MachineFunction &MF,<br>
>                                         CallingConv::ID) const<br>
>                                         override;<br>
>    const uint32_t *getNoPreservedMask() const override;<br>
><br>
> Modified: llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll<br>
> URL:<br>
> <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll?rev=257428&r1=257427&r2=257428&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll?rev=257428&r1=257427&r2=257428&view=diff</a><br>
> ==============================================================================<br>
> --- llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll (original)<br>
> +++ llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll Mon Jan 11 19:08:46<br>
> 2016<br>
> @@ -2,8 +2,8 @@<br>
>  ; TLS function were wrongly model and after fixing that,<br>
>  shrink-wrapping<br>
>  ; cannot help here. To achieve the expected lowering, we need to<br>
>  playing<br>
>  ; tricks similar to AArch64 fast TLS calling convention (r255821).<br>
> -; Re-enable the following run line when<br>
> -; _RUN_: llc < %s -mtriple=x86_64-apple-darwin<br>
> -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s<br>
> +; Applying tricks on x86-64 similar to r255821.<br>
> +; RUN: llc < %s -mtriple=x86_64-apple-darwin<br>
> -enable-shrink-wrap=true | FileCheck %s<br>
>  %struct.S = type { i8 }<br>
><br>
>  @sg = internal thread_local global %struct.S zeroinitializer, align<br>
>  1<br>
> @@ -16,51 +16,28 @@ declare i32 @_tlv_atexit(void (i8*)*, i8<br>
><br>
>  ; Every GPR should be saved - except rdi, rax, and rsp<br>
>  ; CHECK-LABEL: _ZTW2sg<br>
> -; CHECK: pushq %r11<br>
> -; CHECK: pushq %r10<br>
> -; CHECK: pushq %r9<br>
> -; CHECK: pushq %r8<br>
> -; CHECK: pushq %rsi<br>
> -; CHECK: pushq %rdx<br>
> -; CHECK: pushq %rcx<br>
> -; CHECK: pushq %rbx<br>
> +; CHECK-NOT: pushq %r11<br>
> +; CHECK-NOT: pushq %r10<br>
> +; CHECK-NOT: pushq %r9<br>
> +; CHECK-NOT: pushq %r8<br>
> +; CHECK-NOT: pushq %rsi<br>
> +; CHECK-NOT: pushq %rdx<br>
> +; CHECK-NOT: pushq %rcx<br>
> +; CHECK-NOT: pushq %rbx<br>
>  ; CHECK: callq<br>
>  ; CHECK: jne<br>
>  ; CHECK: callq<br>
>  ; CHECK: tlv_atexit<br>
>  ; CHECK: callq<br>
> -; CHECK: popq %rbx<br>
> -; CHECK: popq %rcx<br>
> -; CHECK: popq %rdx<br>
> -; CHECK: popq %rsi<br>
> -; CHECK: popq %r8<br>
> -; CHECK: popq %r9<br>
> -; CHECK: popq %r10<br>
> -; CHECK: popq %r11<br>
> -; SHRINK-LABEL: _ZTW2sg<br>
> -; SHRINK: callq<br>
> -; SHRINK: jne<br>
> -; SHRINK: pushq %r11<br>
> -; SHRINK: pushq %r10<br>
> -; SHRINK: pushq %r9<br>
> -; SHRINK: pushq %r8<br>
> -; SHRINK: pushq %rsi<br>
> -; SHRINK: pushq %rdx<br>
> -; SHRINK: pushq %rcx<br>
> -; SHRINK: pushq %rbx<br>
> -; SHRINK: callq<br>
> -; SHRINK: tlv_atexit<br>
> -; SHRINK: popq %rbx<br>
> -; SHRINK: popq %rcx<br>
> -; SHRINK: popq %rdx<br>
> -; SHRINK: popq %rsi<br>
> -; SHRINK: popq %r8<br>
> -; SHRINK: popq %r9<br>
> -; SHRINK: popq %r10<br>
> -; SHRINK: popq %r11<br>
> -; SHRINK: LBB{{.*}}:<br>
> -; SHRINK: callq<br>
> -define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {<br>
> +; CHECK-NOT: popq %rbx<br>
> +; CHECK-NOT: popq %rcx<br>
> +; CHECK-NOT: popq %rdx<br>
> +; CHECK-NOT: popq %rsi<br>
> +; CHECK-NOT: popq %r8<br>
> +; CHECK-NOT: popq %r9<br>
> +; CHECK-NOT: popq %r10<br>
> +; CHECK-NOT: popq %r11<br>
> +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {<br>
>    %.b.i = load i1, i1* @__tls_guard, align 1<br>
>    br i1 %.b.i, label %__tls_init.exit, label %init.i<br>
><br>
><br>
><br>
> _______________________________________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
> <a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
><br>
<br>
</div></div><span class="HOEnZb"><font color="#888888">--<br>
Hal Finkel<br>
Assistant Computational Scientist<br>
Leadership Computing Facility<br>
Argonne National Laboratory<br>
</font></span></blockquote></div><br></div>