[llvm] r265781 - CXX_FAST_TLS calling convention: performance improvement for PPC64

Chuang-Yu Cheng via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 05:04:34 PDT 2016


Author: cycheng
Date: Fri Apr  8 07:04:32 2016
New Revision: 265781

URL: http://llvm.org/viewvc/llvm-project?rev=265781&view=rev
Log:
CXX_FAST_TLS calling convention: performance improvement for PPC64

This is the same change on PPC64 as r255821 on AArch64. I have even borrowed
his commit message.

The access function has a short entry and a short exit, the initialization
block is only run the first time. To improve the performance, we want to
have a short frame at the entry and exit.

We explicitly handle most of the CSRs via copies. Only the CSRs that are not
handled via copies will be in CSR_SaveList.

Frame lowering and prologue/epilogue insertion will generate a short frame
in the entry and exit according to CSR_SaveList. The majority of the CSRs will
be handled by register allcoator. Register allocator will try to spill and
reload them in the initialization block.

We add CSRsViaCopy, it will be explicitly handled during lowering.

1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target
   supports it for the given machine function and the function has only return
   exits). We also call TLI->initializeSplitCSR to perform initialization.
2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to
   virtual registers at beginning of the entry block and copies from virtual
   registers to CSRsViaCopy at beginning of the exit blocks.
3> we also need to make sure the explicit copies will not be eliminated.

Author: Tom Jablin (tjablin)
Reviewers: hfinkel kbarton cycheng

http://reviews.llvm.org/D17533

Added:
    llvm/trunk/test/CodeGen/PowerPC/cxx_tlscc64.ll
Modified:
    llvm/trunk/lib/CodeGen/TargetFrameLoweringImpl.cpp
    llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td
    llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
    llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h

Modified: llvm/trunk/lib/CodeGen/TargetFrameLoweringImpl.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetFrameLoweringImpl.cpp?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetFrameLoweringImpl.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetFrameLoweringImpl.cpp Fri Apr  8 07:04:32 2016
@@ -63,12 +63,15 @@ void TargetFrameLowering::determineCalle
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
 
+  // Resize before the early returns. Some backends expect that
+  // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no
+  // saved registers.
+  SavedRegs.resize(TRI.getNumRegs());
+
   // Early exit if there are no callee saved registers.
   if (!CSRegs || CSRegs[0] == 0)
     return;
 
-  SavedRegs.resize(TRI.getNumRegs());
-
   // In Naked functions we aren't going to save any registers.
   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
     return;

Modified: llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td Fri Apr  8 07:04:32 2016
@@ -243,12 +243,23 @@ def CSR_SVR464   : CalleeSavedRegs<(add
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4
                                    )>;
 
+// CSRs that are handled by prologue, epilogue.
+def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
+
+def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>;
+
 def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
 
+def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>;
+
 def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
 
+def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>;
+
 def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
 
+def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>;
+
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
 def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),

Modified: llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFastISel.cpp Fri Apr  8 07:04:32 2016
@@ -1597,6 +1597,9 @@ bool PPCFastISel::SelectRet(const Instru
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   const ReturnInst *Ret = cast<ReturnInst>(I);
   const Function &F = *I->getParent()->getParent();
 

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Apr  8 07:04:32 2016
@@ -6029,6 +6029,25 @@ PPCTargetLowering::LowerReturn(SDValue C
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
+  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const MCPhysReg *I =
+    TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+
+      if (PPC::G8RCRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+      else if (PPC::F8RCRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+      else if (PPC::CRRCRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i1));
+      else if (PPC::VRRCRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::Other));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
+
   RetOps[0] = Chain;  // Update chain.
 
   // Add the flag if we have it.
@@ -11922,3 +11941,57 @@ PPCTargetLowering::createFastISel(Functi
                                   const TargetLibraryInfo *LibInfo) const {
   return PPC::createFastISel(FuncInfo, LibInfo);
 }
+
+void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  if (Subtarget.isDarwinABI()) return;
+  if (!Subtarget.isPPC64()) return;
+
+  // Update IsSplitCSR in PPCFunctionInfo
+  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
+  PFI->setIsSplitCSR(true);
+}
+
+void PPCTargetLowering::insertCopiesSplitCSR(
+  MachineBasicBlock *Entry,
+  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  MachineBasicBlock::iterator MBBI = Entry->begin();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (PPC::G8RCRegClass.contains(*I))
+      RC = &PPC::G8RCRegClass;
+    else if (PPC::F8RCRegClass.contains(*I))
+      RC = &PPC::F8RCRegClass;
+    else if (PPC::CRRCRegClass.contains(*I))
+      RC = &PPC::CRRCRegClass;
+    else if (PPC::VRRCRegClass.contains(*I))
+      RC = &PPC::VRRCRegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+             Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+      .addReg(*I);
+
+    // Insert the copy-back instructions right before the terminator
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+              TII->get(TargetOpcode::COPY), *I)
+        .addReg(NewVR);
+  }
+}

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Fri Apr  8 07:04:32 2016
@@ -442,6 +442,18 @@ namespace llvm {
       return true;
     }
 
+    bool supportSplitCSR(MachineFunction *MF) const override {
+      return
+        MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+        MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+    }
+
+    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+
+    void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                            EVT VT) const override;

Modified: llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h Fri Apr  8 07:04:32 2016
@@ -104,6 +104,10 @@ class PPCFunctionInfo : public MachineFu
   /// Whether this uses the PIC Base register or not.
   bool UsesPICBase;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies
+  bool IsSplitCSR;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -125,7 +129,8 @@ public:
       VarArgsNumFPR(0),
       CRSpillFrameIndex(0),
       MF(MF),
-      UsesPICBase(0) {}
+      UsesPICBase(0),
+      IsSplitCSR(false) {}
 
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -196,6 +201,9 @@ public:
   void setUsesPICBase(bool uses) { UsesPICBase = uses; }
   bool usesPICBase() const { return UsesPICBase; }
 
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
   MCSymbol *getPICOffsetSymbol() const;
 
   MCSymbol *getGlobalEPSymbol() const;

Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Fri Apr  8 07:04:32 2016
@@ -116,6 +116,9 @@ PPCRegisterInfo::getCalleeSavedRegs(cons
                : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
                                          : CSR_Darwin32_SaveList);
 
+  if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+    return CSR_SRV464_TLS_PE_SaveList;
+
   // On PPC64, we might need to save r2 (but only if it is not reserved).
   bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
 
@@ -128,6 +131,31 @@ PPCRegisterInfo::getCalleeSavedRegs(cons
                                        : CSR_SVR432_SaveList);
 }
 
+const MCPhysReg *
+PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+  if (Subtarget.isDarwinABI())
+    return nullptr;
+  if (!TM.isPPC64())
+    return nullptr;
+  if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS)
+    return nullptr;
+  if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
+    return nullptr;
+
+  // On PPC64, we might need to save r2 (but only if it is not reserved).
+  bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
+  if (Subtarget.hasAltivec())
+    return SaveR2
+      ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
+      : CSR_SVR464_Altivec_ViaCopy_SaveList;
+  else
+    return SaveR2
+      ? CSR_SVR464_R2_ViaCopy_SaveList
+      : CSR_SVR464_ViaCopy_SaveList;
+}
+
 const uint32_t *
 PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const {

Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=265781&r1=265780&r2=265781&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Fri Apr  8 07:04:32 2016
@@ -75,6 +75,7 @@ public:
 
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID CC) const override;
   const uint32_t *getNoPreservedMask() const override;

Added: llvm/trunk/test/CodeGen/PowerPC/cxx_tlscc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/cxx_tlscc64.ll?rev=265781&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/cxx_tlscc64.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/cxx_tlscc64.ll Fri Apr  8 07:04:32 2016
@@ -0,0 +1,43 @@
+; RUN: llc < %s --enable-shrink-wrap=false -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
+%struct.S = type { i8 }
+
+ at sg = internal thread_local global %struct.S zeroinitializer, align 1
+ at __dso_handle = external global i8
+ at __tls_guard = internal thread_local unnamed_addr global i1 false
+ at sum1 = internal thread_local global i32 0, align 4
+
+declare void @_ZN1SC1Ev(%struct.S*)
+declare void @_ZN1SD1Ev(%struct.S*)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+; CHECK-LABEL: _ZTW2sg
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
+  %.b.i = load i1, i1* @__tls_guard, align 1
+; CHECK: bc 12, 1, [[BB_end:.?LBB0_[0-9]+]]
+  br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+; CHECK: Folded Spill
+  store i1 true, i1* @__tls_guard, align 1
+  tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
+; CHECK: bl _ZN1SC1Ev
+  %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+; CHECK: Folded Reload
+; CHECK: _tlv_atexit
+  br label %__tls_init.exit
+
+; CHECK: [[BB_end]]:
+__tls_init.exit:
+  ret %struct.S* @sg
+}
+
+; CHECK-LABEL: _ZTW4sum1
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}
+
+define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
+  ret i32* @sum1
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
\ No newline at end of file




More information about the llvm-commits mailing list