[llvm] r257428 - CXX_FAST_TLS calling convention: performance improvement for x86-64.

Hal Finkel via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 14 15:28:38 PST 2016


Hi Manman,

It seems this caused https://llvm.org/bugs/show_bug.cgi?id=26136

 -Hal

----- Original Message -----
> From: "Manman Ren via llvm-commits" <llvm-commits at lists.llvm.org>
> To: llvm-commits at lists.llvm.org
> Sent: Monday, January 11, 2016 7:08:46 PM
> Subject: [llvm] r257428 - CXX_FAST_TLS calling convention: performance improvement for x86-64.
> 
> Author: mren
> Date: Mon Jan 11 19:08:46 2016
> New Revision: 257428
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=257428&view=rev
> Log:
> CXX_FAST_TLS calling convention: performance improvement for x86-64.
> 
> This is the same change on x86-64 as r255821 on AArch64.
> rdar://9001553
> 
> Modified:
>     llvm/trunk/lib/Target/X86/X86CallingConv.td
>     llvm/trunk/lib/Target/X86/X86FastISel.cpp
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86ISelLowering.h
>     llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
>     llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
>     llvm/trunk/lib/Target/X86/X86RegisterInfo.h
>     llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
> +++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Jan 11 19:08:46
> 2016
> @@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX
>  def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
>                                               R8, R9, R10, R11)>;
>  
> +// CSRs that are handled by prologue, epilogue.
> +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
> +
> +// CSRs that are handled explicitly via copies.
> +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add
> CSR_64_TLS_Darwin)>;
> +
>  // All GPRs - except r11
>  def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX,
>  RSI, RDI,
>                                                R8, R9, R10, RSP)>;
> 
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jan 11 19:08:46
> 2016
> @@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Ins
>    if (!FuncInfo.CanLowerReturn)
>      return false;
>  
> +  if (TLI.supportSplitCSR(FuncInfo.MF))
> +    return false;
> +
>    CallingConv::ID CC = F.getCallingConv();
>    if (CC != CallingConv::C &&
>        CC != CallingConv::Fast &&
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 11 19:08:46
> 2016
> @@ -2311,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue C
>          DAG.getRegister(RetValReg,
>          getPointerTy(DAG.getDataLayout())));
>    }
>  
> +  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
> +  const MCPhysReg *I =
> +      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
> +  if (I) {
> +    for (; *I; ++I) {
> +      if (X86::GR64RegClass.contains(*I))
> +        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
> +      else
> +        llvm_unreachable("Unexpected register class in
> CSRsViaCopy!");
> +    }
> +  }
> +
>    RetOps[0] = Chain;  // Update chain.
>  
>    // Add the flag if we have it.
> @@ -28827,3 +28839,51 @@ bool X86TargetLowering::isIntDivCheap(EV
>                                     Attribute::MinSize);
>    return OptSize && !VT.isVector();
>  }
> +
> +void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry)
> const {
> +  if (!Subtarget->is64Bit())
> +    return;
> +
> +  // Update IsSplitCSR in X86MachineFunctionInfo.
> +  X86MachineFunctionInfo *AFI =
> +    Entry->getParent()->getInfo<X86MachineFunctionInfo>();
> +  AFI->setIsSplitCSR(true);
> +}
> +
> +void X86TargetLowering::insertCopiesSplitCSR(
> +    MachineBasicBlock *Entry,
> +    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
> +  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
> +  const MCPhysReg *IStart =
> TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
> +  if (!IStart)
> +    return;
> +
> +  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
> +  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
> +  for (const MCPhysReg *I = IStart; *I; ++I) {
> +    const TargetRegisterClass *RC = nullptr;
> +    if (X86::GR64RegClass.contains(*I))
> +      RC = &X86::GR64RegClass;
> +    else
> +      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
> +
> +    unsigned NewVR = MRI->createVirtualRegister(RC);
> +    // Create copy from CSR to a virtual register.
> +    // FIXME: this currently does not emit CFI pseudo-instructions,
> it works
> +    // fine for CXX_FAST_TLS since the C++-style TLS access
> functions should be
> +    // nounwind. If we want to generalize this later, we may need to
> emit
> +    // CFI pseudo-instructions.
> +    assert(Entry->getParent()->getFunction()->hasFnAttribute(
> +               Attribute::NoUnwind) &&
> +           "Function should be nounwind in insertCopiesSplitCSR!");
> +    Entry->addLiveIn(*I);
> +    BuildMI(*Entry, Entry->begin(), DebugLoc(),
> TII->get(TargetOpcode::COPY),
> +            NewVR)
> +        .addReg(*I);
> +
> +    for (auto *Exit : Exits)
> +      BuildMI(*Exit, Exit->begin(), DebugLoc(),
> TII->get(TargetOpcode::COPY),
> +              *I)
> +          .addReg(NewVR);
> +  }
> +}
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Jan 11 19:08:46
> 2016
> @@ -1057,6 +1057,15 @@ namespace llvm {
>                          const SmallVectorImpl<SDValue> &OutVals,
>                          SDLoc dl, SelectionDAG &DAG) const override;
>  
> +    bool supportSplitCSR(MachineFunction *MF) const override {
> +      return MF->getFunction()->getCallingConv() ==
> CallingConv::CXX_FAST_TLS &&
> +          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
> +    }
> +    void initializeSplitCSR(MachineBasicBlock *Entry) const
> override;
> +    void insertCopiesSplitCSR(
> +      MachineBasicBlock *Entry,
> +      const SmallVectorImpl<MachineBasicBlock *> &Exits) const
> override;
> +
>      bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const
>      override;
>  
>      bool mayBeEmittedAsTailCall(CallInst *CI) const override;
> 
> Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Mon Jan 11
> 19:08:46 2016
> @@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public Ma
>    /// used to address arguments in a function using a base pointer.
>    int SEHFramePtrSaveIndex = 0;
>  
> +  /// True if this function has a subset of CSRs that is handled
> explicitly via
> +  /// copies.
> +  bool IsSplitCSR = false;
> +
>  private:
>    /// ForwardedMustTailRegParms - A list of virtual and physical
>    registers
>    /// that must be forwarded to every musttail call.
> @@ -160,6 +164,9 @@ public:
>    SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms()
>    {
>      return ForwardedMustTailRegParms;
>    }
> +
> +  bool isSplitCSR() const { return IsSplitCSR; }
> +  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
>  };
>  
>  } // End llvm namespace
> 
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Jan 11 19:08:46
> 2016
> @@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(cons
>      return CSR_64_RT_AllRegs_SaveList;
>    case CallingConv::CXX_FAST_TLS:
>      if (Is64Bit)
> -      return CSR_64_TLS_Darwin_SaveList;
> +      return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
> +             CSR_64_CXX_TLS_Darwin_PE_SaveList :
> CSR_64_TLS_Darwin_SaveList;
>      break;
>    case CallingConv::Intel_OCL_BI: {
>      if (HasAVX512 && IsWin64)
> @@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(cons
>    return CSR_32_SaveList;
>  }
>  
> +const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
> +    const MachineFunction *MF) const {
> +  assert(MF && "Invalid MachineFunction pointer.");
> +  if (MF->getFunction()->getCallingConv() ==
> CallingConv::CXX_FAST_TLS &&
> +      MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
> +    return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
> +  return nullptr;
> +}
> +
>  const uint32_t *
>  X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
>                                        CallingConv::ID CC) const {
> 
> Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Mon Jan 11 19:08:46
> 2016
> @@ -99,6 +99,8 @@ public:
>    /// callee-save registers on this target.
>    const MCPhysReg *
>    getCalleeSavedRegs(const MachineFunction* MF) const override;
> +  const MCPhysReg *
> +  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
> override;
>    const uint32_t *getCallPreservedMask(const MachineFunction &MF,
>                                         CallingConv::ID) const
>                                         override;
>    const uint32_t *getNoPreservedMask() const override;
> 
> Modified: llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll?rev=257428&r1=257427&r2=257428&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll Mon Jan 11 19:08:46
> 2016
> @@ -2,8 +2,8 @@
>  ; TLS function were wrongly model and after fixing that,
>  shrink-wrapping
>  ; cannot help here. To achieve the expected lowering, we need to
>  playing
>  ; tricks similar to AArch64 fast TLS calling convention (r255821).
> -; Re-enable the following run line when
> -; _RUN_: llc < %s -mtriple=x86_64-apple-darwin
> -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
> +; Applying tricks on x86-64 similar to r255821.
> +; RUN: llc < %s -mtriple=x86_64-apple-darwin
> -enable-shrink-wrap=true | FileCheck %s
>  %struct.S = type { i8 }
>  
>  @sg = internal thread_local global %struct.S zeroinitializer, align
>  1
> @@ -16,51 +16,28 @@ declare i32 @_tlv_atexit(void (i8*)*, i8
>  
>  ; Every GPR should be saved - except rdi, rax, and rsp
>  ; CHECK-LABEL: _ZTW2sg
> -; CHECK: pushq %r11
> -; CHECK: pushq %r10
> -; CHECK: pushq %r9
> -; CHECK: pushq %r8
> -; CHECK: pushq %rsi
> -; CHECK: pushq %rdx
> -; CHECK: pushq %rcx
> -; CHECK: pushq %rbx
> +; CHECK-NOT: pushq %r11
> +; CHECK-NOT: pushq %r10
> +; CHECK-NOT: pushq %r9
> +; CHECK-NOT: pushq %r8
> +; CHECK-NOT: pushq %rsi
> +; CHECK-NOT: pushq %rdx
> +; CHECK-NOT: pushq %rcx
> +; CHECK-NOT: pushq %rbx
>  ; CHECK: callq
>  ; CHECK: jne
>  ; CHECK: callq
>  ; CHECK: tlv_atexit
>  ; CHECK: callq
> -; CHECK: popq %rbx
> -; CHECK: popq %rcx
> -; CHECK: popq %rdx
> -; CHECK: popq %rsi
> -; CHECK: popq %r8
> -; CHECK: popq %r9
> -; CHECK: popq %r10
> -; CHECK: popq %r11
> -; SHRINK-LABEL: _ZTW2sg
> -; SHRINK: callq
> -; SHRINK: jne
> -; SHRINK: pushq %r11
> -; SHRINK: pushq %r10
> -; SHRINK: pushq %r9
> -; SHRINK: pushq %r8
> -; SHRINK: pushq %rsi
> -; SHRINK: pushq %rdx
> -; SHRINK: pushq %rcx
> -; SHRINK: pushq %rbx
> -; SHRINK: callq
> -; SHRINK: tlv_atexit
> -; SHRINK: popq %rbx
> -; SHRINK: popq %rcx
> -; SHRINK: popq %rdx
> -; SHRINK: popq %rsi
> -; SHRINK: popq %r8
> -; SHRINK: popq %r9
> -; SHRINK: popq %r10
> -; SHRINK: popq %r11
> -; SHRINK: LBB{{.*}}:
> -; SHRINK: callq
> -define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
> +; CHECK-NOT: popq %rbx
> +; CHECK-NOT: popq %rcx
> +; CHECK-NOT: popq %rdx
> +; CHECK-NOT: popq %rsi
> +; CHECK-NOT: popq %r8
> +; CHECK-NOT: popq %r9
> +; CHECK-NOT: popq %r10
> +; CHECK-NOT: popq %r11
> +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
>    %.b.i = load i1, i1* @__tls_guard, align 1
>    br i1 %.b.i, label %__tls_init.exit, label %init.i
>  
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory


More information about the llvm-commits mailing list