[llvm] r257424 - CXX_FAST_TLS calling convention: performance improvement for ARM.

Manman Ren via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 11 16:47:19 PST 2016


Author: mren
Date: Mon Jan 11 18:47:18 2016
New Revision: 257424

URL: http://llvm.org/viewvc/llvm-project?rev=257424&view=rev
Log:
CXX_FAST_TLS calling convention: performance improvement for ARM.

This is the same change on ARM as r255821 on AArch64.
rdar://9001553

Modified:
    llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h
    llvm/trunk/lib/Target/ARM/ARMCallingConv.td
    llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h
    llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll

Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Mon Jan 11 18:47:18 2016
@@ -88,10 +88,21 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(
   }
 
   if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
-    return CSR_iOS_CXX_TLS_SaveList;
+    return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
+               ? CSR_iOS_CXX_TLS_PE_SaveList
+               : CSR_iOS_CXX_TLS_SaveList;
   return RegList;
 }
 
+const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
+    const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+      MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
+    return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
+  return nullptr;
+}
+
 const uint32_t *
 ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {

Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Mon Jan 11 18:47:18 2016
@@ -98,6 +98,8 @@ protected:
 public:
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const MCPhysReg *
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;

Modified: llvm/trunk/lib/Target/ARM/ARMCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallingConv.td?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMCallingConv.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMCallingConv.td Mon Jan 11 18:47:18 2016
@@ -234,6 +234,12 @@ def CSR_iOS_TLSCall : CalleeSavedRegs<(a
 def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
                                            (sequence "D%u", 31, 0))>;
 
+// CSRs that are handled by prologue, epilogue.
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+
 // The "interrupt" attribute is used to generate code that is acceptable in
 // exception-handlers of various kinds. It makes us use a different return
 // instruction (handled elsewhere) and affects which registers we must return to

Modified: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFastISel.cpp?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp Mon Jan 11 18:47:18 2016
@@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instru
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   // Build a list of return value registers.
   SmallVector<unsigned, 4> RetRegs;
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jan 11 18:47:18 2016
@@ -2348,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue C
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
+  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *I =
+      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+      if (ARM::GPRRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i32));
+      else if (ARM::DPRRegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
 
   // Update chain and glue.
   RetOps[0] = Chain;
@@ -12393,3 +12406,49 @@ unsigned ARMTargetLowering::getException
   // via the personality function.
   return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
 }
+
+void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  // Update IsSplitCSR in ARMFunctionInfo.
+  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
+  AFI->setIsSplitCSR(true);
+}
+
+void ARMTargetLowering::insertCopiesSplitCSR(
+    MachineBasicBlock *Entry,
+    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (ARM::GPRRegClass.contains(*I))
+      RC = &ARM::GPRRegClass;
+    else if (ARM::DPRRegClass.contains(*I))
+      RC = &ARM::DPRRegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+               Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+            NewVR)
+        .addReg(*I);
+
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+              *I)
+          .addReg(NewVR);
+  }
+}

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Jan 11 18:47:18 2016
@@ -580,6 +580,15 @@ namespace llvm {
                             SmallVectorImpl<SDValue> &InVals,
                             bool isThisReturn, SDValue ThisVal) const;
 
+    bool supportSplitCSR(MachineFunction *MF) const override {
+      return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+          MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+    }
+    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+    void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
     SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,

Modified: llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.cpp?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.cpp Mon Jan 11 18:47:18 2016
@@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(Machine
       RestoreSPFromFP(false), LRSpilledForFarJump(false),
       FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
       GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+      IsSplitCSR(false) {}

Modified: llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMMachineFunctionInfo.h Mon Jan 11 18:47:18 2016
@@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFu
   /// coalesced weights.
   DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies.
+  bool IsSplitCSR;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -128,7 +132,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
     NumAlignedDPRCS2Regs(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false) {}
+    VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF);
 
@@ -199,6 +203,9 @@ public:
   bool hasITBlocks() const { return HasITBlocks; }
   void setHasITBlocks(bool h) { HasITBlocks = h; }
 
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
       llvm_unreachable("Duplicate entries!");

Modified: llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll?rev=257424&r1=257423&r2=257424&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/cxx-tlscc.ll Mon Jan 11 18:47:18 2016
@@ -28,17 +28,19 @@ __tls_init.exit:
 }
 
 ; CHECK-LABEL: _ZTW2sg
-; CHECK: push {r1, r2, r3, r4, r7, lr}
-; CHECK: push {r9, r12}
-; CHECK: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
-; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK: push {lr}
+; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
+; CHECK-NOT: push {r9, r12}
+; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
 ; CHECK: blx
 ; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
 ; CHECK; blx
 ; CHECK: tlv_atexit
 ; CHECK: [[BB_end]]:
 ; CHECK: blx
-; CHECK: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
-; CHECK: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
-; CHECK: pop {r9, r12}
-; CHECK: pop {r1, r2, r3, r4, r7, pc}
+; CHECK-NOT: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK-NOT: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-NOT: pop {r9, r12}
+; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
+; CHECK: pop {lr}




More information about the llvm-commits mailing list