[llvm] r257428 - CXX_FAST_TLS calling convention: performance improvement for x86-64.

Manman Ren via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 11 17:08:46 PST 2016


Author: mren
Date: Mon Jan 11 19:08:46 2016
New Revision: 257428

URL: http://llvm.org/viewvc/llvm-project?rev=257428&view=rev
Log:
CXX_FAST_TLS calling convention: performance improvement for x86-64.

This is the same change on x86-64 as r255821 on AArch64.
rdar://9001553

Modified:
    llvm/trunk/lib/Target/X86/X86CallingConv.td
    llvm/trunk/lib/Target/X86/X86FastISel.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
    llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
    llvm/trunk/lib/Target/X86/X86RegisterInfo.h
    llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Jan 11 19:08:46 2016
@@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX
 def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
                                              R8, R9, R10, R11)>;
 
+// CSRs that are handled by prologue, epilogue.
+def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
+
 // All GPRs - except r11
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
                                               R8, R9, R10, RSP)>;

Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jan 11 19:08:46 2016
@@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Ins
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   CallingConv::ID CC = F.getCallingConv();
   if (CC != CallingConv::C &&
       CC != CallingConv::Fast &&

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 11 19:08:46 2016
@@ -2311,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue C
         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   }
 
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *I =
+      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+      if (X86::GR64RegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
+
   RetOps[0] = Chain;  // Update chain.
 
   // Add the flag if we have it.
@@ -28827,3 +28839,51 @@ bool X86TargetLowering::isIntDivCheap(EV
                                    Attribute::MinSize);
   return OptSize && !VT.isVector();
 }
+
+void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  if (!Subtarget->is64Bit())
+    return;
+
+  // Update IsSplitCSR in X86MachineFunctionInfo.
+  X86MachineFunctionInfo *AFI =
+    Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+  AFI->setIsSplitCSR(true);
+}
+
+void X86TargetLowering::insertCopiesSplitCSR(
+    MachineBasicBlock *Entry,
+    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (X86::GR64RegClass.contains(*I))
+      RC = &X86::GR64RegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+               Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+            NewVR)
+        .addReg(*I);
+
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+              *I)
+          .addReg(NewVR);
+  }
+}

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Jan 11 19:08:46 2016
@@ -1057,6 +1057,15 @@ namespace llvm {
                         const SmallVectorImpl<SDValue> &OutVals,
                         SDLoc dl, SelectionDAG &DAG) const override;
 
+    bool supportSplitCSR(MachineFunction *MF) const override {
+      return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+    }
+    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+    void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
 
     bool mayBeEmittedAsTailCall(CallInst *CI) const override;

Modified: llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86MachineFunctionInfo.h Mon Jan 11 19:08:46 2016
@@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public Ma
   /// used to address arguments in a function using a base pointer.
   int SEHFramePtrSaveIndex = 0;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies.
+  bool IsSplitCSR = false;
+
 private:
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
   /// that must be forwarded to every musttail call.
@@ -160,6 +164,9 @@ public:
   SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
     return ForwardedMustTailRegParms;
   }
+
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
 };
 
 } // End llvm namespace

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Mon Jan 11 19:08:46 2016
@@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(cons
     return CSR_64_RT_AllRegs_SaveList;
   case CallingConv::CXX_FAST_TLS:
     if (Is64Bit)
-      return CSR_64_TLS_Darwin_SaveList;
+      return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
+             CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
     break;
   case CallingConv::Intel_OCL_BI: {
     if (HasAVX512 && IsWin64)
@@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(cons
   return CSR_32_SaveList;
 }
 
+const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
+    const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+      MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
+    return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
+  return nullptr;
+}
+
 const uint32_t *
 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const {

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Mon Jan 11 19:08:46 2016
@@ -99,6 +99,8 @@ public:
   /// callee-save registers on this target.
   const MCPhysReg *
   getCalleeSavedRegs(const MachineFunction* MF) const override;
+  const MCPhysReg *
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;

Modified: llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll?rev=257428&r1=257427&r2=257428&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cxx_tlscc64.ll Mon Jan 11 19:08:46 2016
@@ -2,8 +2,8 @@
 ; TLS function were wrongly model and after fixing that, shrink-wrapping
 ; cannot help here. To achieve the expected lowering, we need to playing
 ; tricks similar to AArch64 fast TLS calling convention (r255821).
-; Re-enable the following run line when 
-; _RUN_: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
+; Applying tricks on x86-64 similar to r255821.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s
 %struct.S = type { i8 }
 
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
@@ -16,51 +16,28 @@ declare i32 @_tlv_atexit(void (i8*)*, i8
 
 ; Every GPR should be saved - except rdi, rax, and rsp
 ; CHECK-LABEL: _ZTW2sg
-; CHECK: pushq %r11
-; CHECK: pushq %r10
-; CHECK: pushq %r9
-; CHECK: pushq %r8
-; CHECK: pushq %rsi
-; CHECK: pushq %rdx
-; CHECK: pushq %rcx
-; CHECK: pushq %rbx
+; CHECK-NOT: pushq %r11
+; CHECK-NOT: pushq %r10
+; CHECK-NOT: pushq %r9
+; CHECK-NOT: pushq %r8
+; CHECK-NOT: pushq %rsi
+; CHECK-NOT: pushq %rdx
+; CHECK-NOT: pushq %rcx
+; CHECK-NOT: pushq %rbx
 ; CHECK: callq
 ; CHECK: jne
 ; CHECK: callq
 ; CHECK: tlv_atexit
 ; CHECK: callq
-; CHECK: popq %rbx
-; CHECK: popq %rcx
-; CHECK: popq %rdx
-; CHECK: popq %rsi
-; CHECK: popq %r8
-; CHECK: popq %r9
-; CHECK: popq %r10
-; CHECK: popq %r11
-; SHRINK-LABEL: _ZTW2sg
-; SHRINK: callq
-; SHRINK: jne
-; SHRINK: pushq %r11
-; SHRINK: pushq %r10
-; SHRINK: pushq %r9
-; SHRINK: pushq %r8
-; SHRINK: pushq %rsi
-; SHRINK: pushq %rdx
-; SHRINK: pushq %rcx
-; SHRINK: pushq %rbx
-; SHRINK: callq
-; SHRINK: tlv_atexit
-; SHRINK: popq %rbx
-; SHRINK: popq %rcx
-; SHRINK: popq %rdx
-; SHRINK: popq %rsi
-; SHRINK: popq %r8
-; SHRINK: popq %r9
-; SHRINK: popq %r10
-; SHRINK: popq %r11
-; SHRINK: LBB{{.*}}:
-; SHRINK: callq
-define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+; CHECK-NOT: popq %rbx
+; CHECK-NOT: popq %rcx
+; CHECK-NOT: popq %rdx
+; CHECK-NOT: popq %rsi
+; CHECK-NOT: popq %r8
+; CHECK-NOT: popq %r9
+; CHECK-NOT: popq %r10
+; CHECK-NOT: popq %r11
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
   %.b.i = load i1, i1* @__tls_guard, align 1
   br i1 %.b.i, label %__tls_init.exit, label %init.i
 




More information about the llvm-commits mailing list