[llvm] r365232 - [PowerPC] Move TOC save to prologue when profitable

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 5 11:38:09 PDT 2019


Author: nemanjai
Date: Fri Jul  5 11:38:09 2019
New Revision: 365232

URL: http://llvm.org/viewvc/llvm-project?rev=365232&view=rev
Log:
[PowerPC] Move TOC save to prologue when profitable

The indirect call sequence on PPC requires that the TOC base register be saved
prior to the indirect call and restored after the call since the indirect call
may branch to a global entry point in another DSO which will update the TOC
base. Over the last couple of years, we have improved this to:

- be able to hoist TOC saves from loops (with changes to MachineLICM)
- avoid multiple saves when one dominates the other[s]

However, it is still possible to have multiple TOC saves dynamically in the
execution path if there is no dominance relationship between them.

This patch moves the TOC save to the prologue when one of the TOC saves is in a
block that post-dominates entry (i.e. it cannot be avoided) or if it is in a
block that is hotter than entry.

Differential revision: https://reviews.llvm.org/D63803

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
    llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
    llvm/trunk/test/CodeGen/PowerPC/remove-redundant-toc-saves.ll
    llvm/trunk/test/CodeGen/PowerPC/tocSaveInPrologue.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp Fri Jul  5 11:38:09 2019
@@ -464,6 +464,7 @@ PPCFrameLowering::determineFrameLayout(c
                                        bool UseEstimate,
                                        unsigned *NewMaxCallFrameSize) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
 
   // Get the number of bytes to allocate from the FrameInfo
   unsigned FrameSize =
@@ -481,6 +482,7 @@ PPCFrameLowering::determineFrameLayout(c
   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
                        !MFI.adjustsStack() &&       // No calls.
                        !MustSaveLR(MF, LR) &&       // No need to save LR.
+                       !FI->mustSaveTOC() &&        // No need to save TOC.
                        !RegInfo->hasBasePointer(MF); // No special alignment.
 
   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
@@ -808,6 +810,7 @@ void PPCFrameLowering::emitPrologue(Mach
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  bool MustSaveTOC = FI->mustSaveTOC();
   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
   bool MustSaveCR = !MustSaveCRs.empty();
   // Do we have a frame pointer and/or base pointer for this function?
@@ -819,6 +822,7 @@ void PPCFrameLowering::emitPrologue(Mach
   unsigned BPReg       = RegInfo->getBaseRegister(MF);
   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
+  unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
   unsigned ScratchReg  = 0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
@@ -1092,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(Mach
     HasSTUX = true;
   }
 
+  // Save the TOC register after the stack pointer update if a prologue TOC
+  // save is required for the function.
+  if (MustSaveTOC) {
+    assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+    BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+      .addReg(TOCReg, getKillRegState(true))
+      .addImm(TOCSaveOffset)
+      .addReg(SPReg);
+  }
+
   if (!HasRedZone) {
     assert(!isPPC64 && "A red zone is always available on PPC64");
     if (HasSTUX) {
@@ -1293,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(Mach
       if (PPC::CRBITRCRegClass.contains(Reg))
         continue;
 
+      if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+        continue;
+
       // For SVR4, don't emit a move for the CR spill slot if we haven't
       // spilled CRs.
       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
@@ -1839,11 +1856,13 @@ void PPCFrameLowering::processFunctionBe
   unsigned MinFPR = PPC::F31;
   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
 
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool HasGPSaveArea = false;
   bool HasG8SaveArea = false;
   bool HasFPSaveArea = false;
   bool HasVRSAVESaveArea = false;
   bool HasVRSaveArea = false;
+  bool MustSaveTOC = FI->mustSaveTOC();
 
   SmallVector<CalleeSavedInfo, 18> GPRegs;
   SmallVector<CalleeSavedInfo, 18> G8Regs;
@@ -1852,6 +1871,8 @@ void PPCFrameLowering::processFunctionBe
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
+    assert((!MustSaveTOC || (Reg != PPC::X2 && Reg != PPC::R2)) &&
+           "Not expecting to try to spill R2 in a function that must save TOC");
     if (PPC::GPRCRegClass.contains(Reg) ||
         PPC::SPE4RCRegClass.contains(Reg)) {
       HasGPSaveArea = true;
@@ -2161,6 +2182,8 @@ PPCFrameLowering::spillCalleeSavedRegist
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
@@ -2191,6 +2214,10 @@ PPCFrameLowering::spillCalleeSavedRegist
       continue;
     }
 
+    // The actual spill will happen in the prologue.
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
@@ -2318,6 +2345,8 @@ PPCFrameLowering::restoreCalleeSavedRegi
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   bool CR2Spilled = false;
   bool CR3Spilled = false;
   bool CR4Spilled = false;
@@ -2340,6 +2369,9 @@ PPCFrameLowering::restoreCalleeSavedRegi
     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
       continue;
 
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     if (Reg == PPC::CR2) {
       CR2Spilled = true;
       // The spill slot is associated only with CR2, which is the

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Fri Jul  5 11:38:09 2019
@@ -21,9 +21,12 @@
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,6 +40,7 @@ using namespace llvm;
 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
 STATISTIC(MultiTOCSaves,
           "Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
@@ -84,6 +88,9 @@ struct PPCMIPeephole : public MachineFun
 
 private:
   MachineDominatorTree *MDT;
+  MachinePostDominatorTree *MPDT;
+  MachineBlockFrequencyInfo *MBFI;
+  uint64_t EntryFreq;
 
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
@@ -102,7 +109,11 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addPreserved<MachineDominatorTree>();
+    AU.addPreserved<MachinePostDominatorTree>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -120,6 +131,9 @@ void PPCMIPeephole::initialize(MachineFu
   MF = &MFParm;
   MRI = &MF->getRegInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
+  MPDT = &getAnalysis<MachinePostDominatorTree>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  EntryFreq = MBFI->getEntryFreq();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
   LLVM_DEBUG(MF->dump());
@@ -200,6 +214,31 @@ getKnownLeadingZeroCount(MachineInstr *M
 void PPCMIPeephole::UpdateTOCSaves(
   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+  assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+         "TOC-save removal only supported on ELFv2");
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+
+  MachineBasicBlock *Entry = &MF->front();
+  uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+  // If the block in which the TOC save resides is in a block that
+  // post-dominates Entry, or a block that is hotter than entry (keep in mind
+  // that early MachineLICM has already run so the TOC save won't be hoisted)
+  // we can just do the save in the prologue.
+  if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+    FI->setMustSaveTOC(true);
+
+  // If we are saving the TOC in the prologue, all the TOC saves can be removed
+  // from the code.
+  if (FI->mustSaveTOC()) {
+    for (auto &TOCSave : TOCSaves)
+      TOCSave.second = false;
+    // Add new instruction to map.
+    TOCSaves[MI] = false;
+    return;
+  }
+
   bool Keep = true;
   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
     MachineInstr *CurrInst = It->first;
@@ -777,6 +816,10 @@ bool PPCMIPeephole::simplifyCode(void) {
 
   // Eliminate all the TOC save instructions which are redundant.
   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  if (FI->mustSaveTOC())
+    NumTOCSavesInPrologue++;
+
   // We try to eliminate redundant compare instruction.
   Simplified |= eliminateRedundantCompare();
 
@@ -1341,6 +1384,9 @@ bool PPCMIPeephole::emitRLDICWhenLowerin
 
 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
                       "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
                     "PowerPC MI Peephole Optimization", false, false)
 

Modified: llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h Fri Jul  5 11:38:09 2019
@@ -44,6 +44,12 @@ class PPCFunctionInfo : public MachineFu
   /// PEI.
   bool MustSaveLR;
 
+  /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+  /// prologue of the function. This is typically the case when there are
+  /// indirect calls in the function and it is more profitable to save the
+  /// TOC pointer in the prologue than in the block(s) containing the call(s).
+  bool MustSaveTOC = false;
+
   /// Do we have to disable shrink-wrapping? This has to be set if we emit any
   /// instructions that clobber LR in the entry block because discovering this
   /// in PEI is too late (happens after shrink-wrapping);
@@ -151,6 +157,9 @@ public:
   void setMustSaveLR(bool U) { MustSaveLR = U; }
   bool mustSaveLR() const    { return MustSaveLR; }
 
+  void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+  bool mustSaveTOC() const    { return MustSaveTOC; }
+
   /// We certainly don't want to shrink wrap functions if we've emitted a
   /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
   /// has to go into the entry block.

Modified: llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll Fri Jul  5 11:38:09 2019
@@ -21,9 +21,9 @@ define noalias i8* @_ZN2CC3funEv(%class.
 ; CHECK-NEXT:    std 30, -16(1)
 ; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -48(1)
-; CHECK-NEXT:    ld 12, 0(3)
-; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    std 2, 24(1)
+; CHECK-NEXT:    mr 30, 3
+; CHECK-NEXT:    ld 12, 0(3)
 ; CHECK-NEXT:    mtctr 12
 ; CHECK-NEXT:    bctrl
 ; CHECK-NEXT:    ld 2, 24(1)

Modified: llvm/trunk/test/CodeGen/PowerPC/remove-redundant-toc-saves.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/remove-redundant-toc-saves.ll?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/remove-redundant-toc-saves.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/remove-redundant-toc-saves.ll Fri Jul  5 11:38:09 2019
@@ -37,7 +37,6 @@ if.end:
 define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
 ; CHECK-LABEL: test3:
 ; CHECK:    std 2, 24(1)
-; CHECK:    std 2, 24(1)
 ; CHECK-NOT:    std 2, 24(1)
 entry:
   %tobool = icmp eq i32 %i, 0
@@ -87,7 +86,6 @@ define signext i32 @test5(i32 signext %i
 entry:
 ; CHECK-LABEL: test5:
 ; CHECK:    std 2, 24(1)
-; CHECK:    std 2, 24(1)
 
   %tobool = icmp eq i32 %i, 0
   br i1 %tobool, label %if.end, label %if.then

Modified: llvm/trunk/test/CodeGen/PowerPC/tocSaveInPrologue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/tocSaveInPrologue.ll?rev=365232&r1=365231&r2=365232&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/tocSaveInPrologue.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/tocSaveInPrologue.ll Fri Jul  5 11:38:09 2019
@@ -19,13 +19,12 @@ define dso_local void @test(void (i32)*
 ; CHECK-NEXT:    cmpwi cr1, r4, 11
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    extsw r28, r4
+; CHECK-NEXT:    std r2, 24(r1)
 ; CHECK-NEXT:    cmpwi r29, 1
 ; CHECK-NEXT:    cror 4*cr5+lt, lt, 4*cr1+lt
-; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_3
-; CHECK-NEXT:  # %bb.1: # %for.body.us.preheader
-; CHECK-NEXT:    std r2, 24(r1)
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_2
 ; CHECK-NEXT:    .p2align 5
-; CHECK-NEXT:  .LBB0_2: # %for.body.us
+; CHECK-NEXT:  .LBB0_1: # %for.body.us
 ; CHECK-NEXT:  # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    mtctr r30
 ; CHECK-NEXT:    mr r3, r28
@@ -34,12 +33,11 @@ define dso_local void @test(void (i32)*
 ; CHECK-NEXT:    ld 2, 24(r1)
 ; CHECK-NEXT:    addi r29, r29, -1
 ; CHECK-NEXT:    cmplwi r29, 0
-; CHECK-NEXT:    bne cr0, .LBB0_2
-; CHECK-NEXT:  .LBB0_3: # %for.cond.cleanup
+; CHECK-NEXT:    bne cr0, .LBB0_1
+; CHECK-NEXT:  .LBB0_2: # %for.cond.cleanup
 ; CHECK-NEXT:    mtctr r30
 ; CHECK-NEXT:    mr r3, r28
 ; CHECK-NEXT:    mr r12, r30
-; CHECK-NEXT:    std r2, 24(r1)
 ; CHECK-NEXT:    bctrl
 ; CHECK-NEXT:    ld 2, 24(r1)
 ; CHECK-NEXT:    addi r1, r1, 64




More information about the llvm-commits mailing list