[llvm-commits] [llvm] r67828 - /llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp

John Mosby ojomojo at gmail.com
Thu Mar 26 23:09:40 PDT 2009


Author: jdm
Date: Fri Mar 27 01:09:40 2009
New Revision: 67828

URL: http://llvm.org/viewvc/llvm-project?rev=67828&view=rev
Log:
Shrink wrapping in PEI: initial release. Finishing development, enable with --shrink-wrap.

Modified:
    llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp

Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=67828&r1=67827&r2=67828&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original)
+++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Fri Mar 27 01:09:40 2009
@@ -14,9 +14,34 @@
 // This pass must be run after register allocation.  After this pass is
 // executed, it is illegal to construct MO_FrameIndex operands.
 //
+// This pass implements a shrink wrapping variant of prolog/epilog insertion:
+// - Places callee saved register (CSR) spills and restores in the CFG to
+//   tightly surround uses so that execution paths that do not use CSRs do not
+//   pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+//   loop(nest), the spills are placed in the loop preheader, and restores are
+//   placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses: e.g. if a restore is placed in a join
+//   block, a matching spill is added to the end of all immediate predecessor
+//   blocks that are not reached by a spill. Similarly for saves placed in
+//   branch blocks.
+//
+// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
+// which basic blocks require callee-saved register save/restore code.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent shrink wrapping of callee-saved register save/restore
+// code into loops.
+//
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "shrink-wrap"
+
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,11 +52,24 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/ADT/STLExtras.h"
 #include <climits>
+#include <sstream>
+
 using namespace llvm;
 
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+  cl::desc("Apply shrink wrapping to callee-saved register spills/restores"));
+
 namespace {
   struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
     static char ID;
@@ -42,8 +80,13 @@
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
+      AU.setPreservesCFG();
+      if (ShrinkWrapping) {
+        AU.addRequired<MachineLoopInfo>();
+        AU.addRequired<MachineDominatorTree>();
+      }
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -69,8 +112,16 @@
       // information and eliminates call frame pseudo instructions.
       calculateCalleeSavedRegisters(Fn);
 
+      // Determine placement of CSR spill/restore code:
+      //  - with shrink wrapping, place spills and restores to tightly
+      //    enclose regions in the Machine CFG of the function where
+      //    they are used. Without shrink wrapping
+      //  - default (no shrink wrapping), place all spills in the
+      //    entry block, all restores in return blocks.
+      placeCSRSpillsAndRestores(Fn);
+
       // Add the code to save and restore the callee saved registers
-      saveCalleeSavedRegisters(Fn);
+      insertCSRSpillsAndRestores(Fn);
 
       // Allow the target machine to make final modifications to the function
       // before the frame layout is finalized.
@@ -94,7 +145,7 @@
       delete RS;
       return true;
     }
-  
+
   private:
     RegScavenger *RS;
 
@@ -102,22 +153,702 @@
     // stack frame indexes.
     unsigned MinCSFrameIndex, MaxCSFrameIndex;
 
+    // Analysis info for spill/restore placement.
+    // "CSR": "callee saved register".
+
+    // CSRegSet contains indices into the Callee Saved Register Info
+    // vector built by calculateCalleeSavedRegisters() and accessed
+    // via MF.getFrameInfo()->getCalleeSavedInfo().
+    typedef SparseBitVector<> CSRegSet;
+
+    // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+    // saved register indices.
+    typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+    // Set and maps for computing CSR spill/restore placement:
+    //  used in function (UsedCSRegs)
+    //  used in a basic block (CSRUsed)
+    //  anticipatable in a basic block (Antic{In,Out})
+    //  available in a basic block (Avail{In,Out})
+    //  to be spilled at the entry to a basic block (CSRSave)
+    //  to be restored at the end of a basic block (CSRRestore)
+
+    CSRegSet UsedCSRegs;
+    CSRegBlockMap CSRUsed;
+    CSRegBlockMap AnticIn, AnticOut;
+    CSRegBlockMap AvailIn, AvailOut;
+    CSRegBlockMap CSRSave;
+    CSRegBlockMap CSRRestore;
+
+    // Entry and return blocks of the current function.
+    MachineBasicBlock* EntryBlock;
+    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+    // Flag to control shrink wrapping per-function:
+    // may choose to skip shrink wrapping for certain
+    // functions.
+    bool ShrinkWrapThisFunction;
+
+    bool calculateSets(MachineFunction &Fn);
+    void calculateAnticAvail(MachineFunction &Fn);
+    MachineBasicBlock* moveSpillsOutOfLoops(MachineFunction &Fn,
+                                            MachineBasicBlock* MBB);
+    void addRestoresForSBranchBlock(MachineFunction &Fn,
+                                    MachineBasicBlock* MBB);
+    void moveRestoresOutOfLoops(MachineFunction& Fn,
+                                MachineBasicBlock* MBB,
+                                std::vector<MachineBasicBlock*>& SBLKS);
+    void addSavesForRJoinBlocks(MachineFunction& Fn,
+                                std::vector<MachineBasicBlock*>& SBLKS);
+    void placeSpillsAndRestores(MachineFunction &Fn);
+    void placeCSRSpillsAndRestores(MachineFunction &Fn);
     void calculateCalleeSavedRegisters(MachineFunction &Fn);
-    void saveCalleeSavedRegisters(MachineFunction &Fn);
+    void insertCSRSpillsAndRestores(MachineFunction &Fn);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     void replaceFrameIndices(MachineFunction &Fn);
     void insertPrologEpilogCode(MachineFunction &Fn);
+
+    // Initialize all shrink wrapping data.
+    void initShrinkWrappingInfo() {
+      UsedCSRegs.clear();
+      CSRUsed.clear();
+      AnticIn.clear();
+      AnticOut.clear();
+      AvailIn.clear();
+      AvailOut.clear();
+      CSRSave.clear();
+      CSRRestore.clear();
+      EntryBlock = 0;
+      if (! ReturnBlocks.empty())
+        ReturnBlocks.clear();
+      ShrinkWrapThisFunction = ShrinkWrapping;
+    }
+
+    // Convienences for dealing with machine loops.
+    MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
+      assert(LP && "Machine loop is NULL.");
+      MachineBasicBlock* PHDR = LP->getLoopPreheader();
+      MachineLoop* PLP = LP->getParentLoop();
+      while (PLP) {
+        PHDR = PLP->getLoopPreheader();
+        PLP = PLP->getParentLoop();
+      }
+      return PHDR;
+    }
+
+    MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
+      if (LP == 0)
+        return 0;
+      MachineLoop* PLP = LP->getParentLoop();
+      while (PLP) {
+        LP = PLP;
+        PLP = PLP->getParentLoop();
+      }
+      return LP;
+    }
+
+#ifndef NDEBUG
+    // Debugging methods.
+    static std::string getBasicBlockName(const MachineBasicBlock* MBB) {
+      std::ostringstream name;
+      if (MBB) {
+        if (MBB->getBasicBlock())
+          name << MBB->getBasicBlock()->getName();
+        else
+          name << "_MBB_" << MBB->getNumber();
+      }
+      return name.str();
+    }
+
+    static std::string stringifyCSRegSet(const CSRegSet& s,
+                                         MachineFunction &Fn) {
+      const TargetRegisterInfo* TRI = Fn.getTarget().getRegisterInfo();
+      const std::vector<CalleeSavedInfo> CSI =
+        Fn.getFrameInfo()->getCalleeSavedInfo();
+
+      std::ostringstream srep;
+      if (CSI.size() == 0) {
+        srep << "[]";
+        return srep.str();
+      }
+      srep << "[";
+      CSRegSet::iterator I = s.begin(), E = s.end();
+      if (I != E) {
+        unsigned reg = CSI[*I].getReg();
+        srep << TRI->getName(reg);
+        for (++I; I != E; ++I) {
+          reg = CSI[*I].getReg();
+          srep << ",";
+          srep << TRI->getName(reg);
+        }
+      }
+      srep << "]";
+      return srep.str();
+    }
+
+    static void dumpSet(const CSRegSet& s, MachineFunction &Fn) {
+      DOUT << stringifyCSRegSet(s, Fn) << "\n";
+    }
+#endif
+
   };
   char PEI::ID = 0;
 }
 
-
 /// createPrologEpilogCodeInserter - This function returns a pass that inserts
 /// prolog and epilog code, and eliminates abstract frame references.
 ///
 FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
 
 
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+#ifndef NDEBUG
+  DOUT << "Place CSR spills/restores for "
+       << Fn.getFunction()->getName() << "\n";
+#endif
+
+  initShrinkWrappingInfo();
+
+  if (calculateSets(Fn))
+    placeSpillsAndRestores(Fn);
+}
+
+/// calculateAnticAvail - helper for computing the data flow
+/// sets required for determining spill/restore placements.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+
+  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+  bool changed = true;
+  unsigned iterations = 0;
+  while (changed) {
+    changed = false;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+
+      // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCC(MBB))
+      MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+        SE = MBB->succ_end();
+      if (SI != SE) {
+        CSRegSet prevAnticOut = AnticOut[MBB];
+        MachineBasicBlock* SUCC = *SI;
+        AnticOut[MBB] = AnticIn[SUCC];
+        for (++SI; SI != SE; ++SI) {
+          SUCC = *SI;
+          AnticOut[MBB] &= AnticIn[SUCC];
+        }
+        if (prevAnticOut != AnticOut[MBB])
+          changed = true;
+      }
+      // AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+      CSRegSet prevAnticIn = AnticIn[MBB];
+      AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+      if (prevAnticIn |= AnticIn[MBB])
+        changed = true;
+
+      // AvailIn[MBB] = INTERSECT(AvailOut[S] for S in PRED(MBB))
+      MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+        PE = MBB->pred_end();
+      if (PI != PE) {
+        CSRegSet prevAvailIn = AvailIn[MBB];
+        MachineBasicBlock* PRED = *PI;
+        AvailIn[MBB] = AvailOut[PRED];
+        for (++PI; PI != PE; ++PI) {
+          PRED = *PI;
+          AvailIn[MBB] &= AvailOut[PRED];
+        }
+        if (prevAvailIn != AvailIn[MBB])
+          changed = true;
+      }
+      // AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+      CSRegSet prevAvailOut = AvailOut[MBB];
+      AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+      if (prevAvailOut |= AvailOut[MBB])
+        changed = true;
+    }
+    ++iterations;
+  }
+
+  // EXP
+  AnticIn[EntryBlock].clear();
+  AnticOut[EntryBlock].clear();
+
+#ifndef NDEBUG
+  DOUT << "-----------------------------------------------------------\n";
+  DOUT << "iterations = " << iterations << "\n";
+  DOUT << "-----------------------------------------------------------\n";
+  DOUT << "MBB | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
+  DOUT << "-----------------------------------------------------------\n";
+  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+
+    DOUT << getBasicBlockName(MBB) << " | "
+         << stringifyCSRegSet(AnticIn[MBB], Fn)
+         << " | "
+         << stringifyCSRegSet(AnticOut[MBB], Fn)
+         << " | "
+         << stringifyCSRegSet(AvailIn[MBB], Fn)
+         << " | "
+         << stringifyCSRegSet(AvailOut[MBB], Fn)
+         << "\n";
+  }
+#endif
+}
+
+/// calculateSets - helper function for placeCSRSpillsAndRestores,
+/// collect the CSRs used in this function, develop the DF sets that
+/// describe the minimal regions in the Machine CFG around which spills,
+/// restores must be placed.
+///
+/// This function decides if shrink wrapping should actually be done:
+///   if all CSR uses are in the entry block, no shrink wrapping is possible,
+///   so ShrinkWrapping is turned off (for the current function) and the
+///   function returns false.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+
+  // Sets used to compute spill, restore placement sets.
+  const std::vector<CalleeSavedInfo> CSI =
+    Fn.getFrameInfo()->getCalleeSavedInfo();
+
+  // If no CSRs used, we are done.
+  if (CSI.empty()) {
+#ifndef NDEBUG
+    DOUT << Fn.getFunction()->getName()
+         << " uses no callee-saved registers.\n";
+#endif
+    return false;
+  }
+
+#ifndef NDEBUG
+  DOUT << "-----------------------------------------------------------\n";
+#endif
+
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  bool allCSRUsesInEntryBlock = true;
+
+  // Initialize UsedCSRegs set, CSRUsed map.
+  // At the same time, put entry block directly into
+  // CSRSave, CSRRestore sets if any CSRs are used.
+  //
+  // Quick exit option (not implemented):
+  //   Given N CSR uses in entry block,
+  //   revert to default behavior, skip the placement
+  //   step and put all saves in entry, restores in
+  //   return blocks.
+
+  // Set up entry and return blocks.
+  EntryBlock = Fn.begin();
+  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+       MBB != E; ++MBB)
+    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+      ReturnBlocks.push_back(MBB);
+
+  // TODO -- check for a use of a CSR in each imm. successor of EntryBlock,
+  // do not shrink wrap this function if this is the case.
+
+  // If not shrink wrapping (this function) at this point, set bits in
+  // CSR{Save,Restore}[] and UsedCSRegs, then return.
+  if (! ShrinkWrapThisFunction) {
+    for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+      UsedCSRegs.set(inx);
+      CSRSave[EntryBlock].set(inx);
+      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+        CSRRestore[ReturnBlocks[ri]].set(inx);
+    }
+    return false;
+  }
+
+  // Walk instructions in all MBBs, create basic sets, choose
+  // whether or not to shrink wrap this function.
+  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+        unsigned Reg = CSI[inx].getReg();
+        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+        // CSRUsed map for the current block.
+        for (unsigned opInx = 0, opEnd = I->getNumOperands();
+             opInx != opEnd; ++opInx) {
+          const MachineOperand &MO = I->getOperand(opInx);
+          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (!MOReg)
+            continue;
+          if (MOReg == Reg ||
+              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+               TargetRegisterInfo::isPhysicalRegister(Reg) &&
+               TRI->isSubRegister(MOReg, Reg))) {
+            // CSR Reg is defined/used in block MBB.
+            UsedCSRegs.set(inx);
+            CSRUsed[MBB].set(inx);
+            // Short-circuit analysis for entry, return blocks:
+            // if a CSR is used in the entry block, add it directly
+            // to CSRSave[EntryBlock] and to CSRRestore[R] for R
+            // in ReturnBlocks. Note CSR uses in non-entry blocks.
+            if (ShrinkWrapThisFunction) {
+              if (MBB == EntryBlock) {
+                CSRSave[MBB].set(inx);
+                for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+                  CSRRestore[ReturnBlocks[ri]].set(inx);
+              } else
+                allCSRUsesInEntryBlock = false;
+            } else {
+              // Not shrink wrapping => ensure saves/restores are correctly
+              // added for entry, return blocks.
+              CSRSave[EntryBlock].set(inx);
+              for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+                CSRRestore[ReturnBlocks[ri]].set(inx);
+            }
+          }
+        }
+      }
+    }
+#ifndef NDEBUG
+    DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+         << stringifyCSRegSet(CSRUsed[MBB], Fn) << "\n";
+#endif
+  }
+
+#ifndef NDEBUG
+  DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs, Fn) << "\n";
+#endif
+
+  // Early exit:
+  // 1. Not asked to do shrink wrapping => just "place" all spills(restores)
+  //    in the entry(return) block(s), already done above.
+  // 2. All CSR uses in entry block => same as case 1, but say we will
+  //    not shrink wrap the current function.
+  ShrinkWrapThisFunction = (ShrinkWrapping &&
+                            ShrinkWrapThisFunction &&
+                            ! allCSRUsesInEntryBlock);
+  if (! ShrinkWrapThisFunction) {
+    return false;
+  }
+
+  calculateAnticAvail(Fn);
+
+  return true;
+}
+
+/// moveSpillsOutOfLoops - helper for placeSpillsAndRestores() which
+/// relocates a spill from a subgraph in a loop to the loop preheader.
+/// Returns the MBB to which saves have been moved, or the given MBB
+/// if it is a branch point.
+///
+MachineBasicBlock* PEI::moveSpillsOutOfLoops(MachineFunction &Fn,
+                                             MachineBasicBlock* MBB) {
+  if (MBB == 0 || CSRSave[MBB].empty())
+    return 0;
+
+  // Block to which saves are moved.
+  MachineBasicBlock* DEST = 0;
+  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+
+  if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+    MachineBasicBlock* LPH = getTopLevelLoopPreheader(LP);
+    assert(LPH && "Loop has no top level preheader?");
+
+#ifndef NDEBUG
+    DOUT << "Moving saves of "
+         << stringifyCSRegSet(CSRSave[MBB], Fn)
+         << " from " << getBasicBlockName(MBB)
+         << " to " << getBasicBlockName(LPH) << "\n";
+#endif
+    // Add CSRegSet from MBB to LPH, empty out MBB's CSRegSet.
+    CSRSave[LPH] |= CSRSave[MBB];
+    // If saves moved to entry block, add restores to returns.
+    if (LPH == EntryBlock) {
+      for (unsigned i = 0, e = ReturnBlocks.size(); i != e; ++i)
+        CSRRestore[ReturnBlocks[i]] |= CSRSave[MBB];
+    } else {
+      // Remember where we moved the save so we can add
+      // restores on successor paths if necessary.
+      if (LPH->succ_size() > 1)
+        DEST = LPH;
+    }
+    CSRSave[MBB].clear();
+  } else if (MBB->succ_size() > 1)
+    DEST = MBB;
+  return DEST;
+}
+
+/// addRestoresForSBranchBlock - helper for placeSpillsAndRestores() which
+/// adds restores of CSRs saved in branch point MBBs to the front of any
+/// successor blocks connected to regions with no uses of the saved CSRs.
+///
+void PEI::addRestoresForSBranchBlock(MachineFunction &Fn,
+                                     MachineBasicBlock* MBB) {
+
+  if (MBB == 0 || CSRSave[MBB].empty() || MBB->succ_size() < 2)
+    return;
+
+  // Add restores of CSRs saved in branch point MBBs to the
+  // front of any succ blocks flowing into regions that
+  // have no uses of MBB's CSRs.
+  bool hasCSRUses = false;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    bool needsRestore = false;
+    if (CSRUsed[SUCC].intersects(CSRSave[MBB])) {
+      hasCSRUses = true;
+      continue;
+    }
+    needsRestore = true;
+    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+           BE = df_end(SUCC); BI != BE; ++BI) {
+      MachineBasicBlock* SBB = *BI;
+      if (CSRUsed[SBB].intersects(CSRSave[MBB])) {
+        hasCSRUses = true;
+        needsRestore = false;
+        break;
+      }
+    }
+    // Additional restores are needed for SUCC iff there is at least
+    // one CSR use reachable from the successors of MBB and there
+    // are no uses in or below SUCC.
+    if (needsRestore && hasCSRUses) {
+#ifndef NDEBUG
+      DOUT << "MBB " << getBasicBlockName(MBB)
+           << " needs a restore on path to successor "
+           << getBasicBlockName(SUCC) << "\n";
+#endif
+      // Add restores to SUCC for all CSRs saved in MBB...
+      CSRRestore[SUCC] = CSRSave[MBB];
+    }
+  }
+}
+
+/// moveRestoresOutOfLoops - helper for placeSpillsAndRestores() which
+/// relocates restores from a subgraph in a loop to the loop exit blocks.
+/// This function records the MBBs to which restores have been moved in
+/// SBLKS. If no restores are moved, SBLKS contains the input MBB if it
+/// is a join point in the Machine CFG.
+///
+void PEI::moveRestoresOutOfLoops(MachineFunction& Fn,
+                                 MachineBasicBlock* MBB,
+                                 std::vector<MachineBasicBlock*>& SBLKS) {
+
+  SBLKS.clear();
+  if (MBB == 0 || CSRRestore[MBB].empty())
+    return;
+
+  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+
+  if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+    LP = getTopLevelLoopParent(LP);
+    assert(LP && "Loop with no top level parent?");
+
+    SmallVector<MachineBasicBlock*, 4> exitBlocks;
+
+    LP->getExitBlocks(exitBlocks);
+    assert(exitBlocks.size() > 0 &&
+           "Loop has no top level exit blocks?");
+    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+      MachineBasicBlock* EXB = exitBlocks[i];
+
+#ifndef NDEBUG
+      DOUT << "Moving restores of "
+           << stringifyCSRegSet(CSRRestore[MBB], Fn)
+           << " from " << getBasicBlockName(MBB)
+           << " to " << getBasicBlockName(EXB) << "\n";
+#endif
+
+      // Add CSRegSet from MBB to LPE, empty out MBB's CSRegSet.
+      CSRRestore[EXB] |= CSRRestore[MBB];
+      if (EXB->pred_size() > 1)
+        SBLKS.push_back(EXB);
+    }
+    CSRRestore[MBB].clear();
+  } else if (MBB->pred_size() > 1)
+    SBLKS.push_back(MBB);
+}
+
+/// addSavesForRJoinBlocks - Add saves of CSRs restored in join point MBBs
+/// to the ends of any pred blocks that flow into MBB from regions that
+/// have no uses of MBB's CSRs.
+///
+void PEI::addSavesForRJoinBlocks(MachineFunction& Fn,
+                                 std::vector<MachineBasicBlock*>& SBLKS) {
+
+  if (SBLKS.empty())
+    return;
+
+  for (unsigned i = 0, e = SBLKS.size(); i != e; ++i) {
+    MachineBasicBlock* MBB = SBLKS[i];
+    if (MBB->pred_size() > 1) {
+      bool needsSave = false;
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        MachineBasicBlock* PRED = *PI;
+
+        // Walk back up in the CFG from the preds of MBB, look for
+        // a block that uses any CSR that is restored in MBB.
+        if (CSRUsed[PRED].intersects(CSRRestore[MBB]))
+          continue;
+        needsSave = true;
+        for (idf_iterator<MachineBasicBlock*> PPI = idf_begin(PRED),
+               PPE = idf_end(PRED); PPI != PPE; ++PPI) {
+          MachineBasicBlock* PBB = *PPI;
+          if (CSRUsed[PBB].intersects(CSRRestore[MBB])) {
+            needsSave = false;
+            break;
+          }
+        }
+        if (needsSave) {
+          // Add saves to PRED for all CSRs restored in MBB...
+#ifndef NDEBUG
+          DOUT << "MBB " << getBasicBlockName(MBB)
+               << " needs a save on path from predecessor "
+               << getBasicBlockName(PRED) << "\n";
+#endif
+          CSRSave[PRED] = CSRRestore[MBB];
+        }
+      }
+    }
+  }
+}
+
+/// placeSpillsAndRestores - decide which MBBs need spills, restores
+/// of CSRs.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+
+#ifndef NDEBUG
+  DOUT << "-----------------------------------------------------------\n";
+#endif
+
+  // Calculate CSR{Save,Restore} using Antic, Avail on the Machine-CFG.
+  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    // Entry block saves are recorded in UsedCSRegs pass above.
+    if (MBB != EntryBlock) {
+      // Intersect (CSRegs - AnticIn[P]) for all predecessors P of MBB
+      CSRegSet anticInPreds;
+      MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+        PE = MBB->pred_end();
+      if (PI != PE) {
+        MachineBasicBlock* PRED = *PI;
+        anticInPreds = UsedCSRegs - AnticIn[PRED];
+        for (++PI; PI != PE; ++PI) {
+          PRED = *PI;
+          // Handle self loop.
+          if (PRED != MBB)
+            anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+        }
+      }
+      // CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds
+      CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+      // Remove the CSRs that are saved in the entry block
+      if (! CSRSave[MBB].empty() && ! CSRSave[EntryBlock].empty())
+        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+
+      // Move saves inside loops to the preheaders of the outermost
+      // containing loops, add restores to blocks reached by saves
+      // placed at branch points where necessary.
+      if (MachineBasicBlock* DESTBB = moveSpillsOutOfLoops(Fn, MBB)) {
+        // Add restores to blocks reached by saves placed at branch
+        // points where necessary.
+        addRestoresForSBranchBlock(Fn, DESTBB);
+      }
+    }
+
+#ifndef NDEBUG
+    if (! CSRSave[MBB].empty())
+      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(CSRSave[MBB], Fn) << "\n";
+#endif
+
+    // Compute CSRRestore, which may already be set for return blocks.
+    if (! CSRRestore[MBB].empty() || MBB->pred_size() == 0)
+      continue;
+
+    // Intersect (CSRegs - AvailOut[S]) for all successors S of MBB
+    CSRegSet availOutSucc;
+    MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+      SE = MBB->succ_end();
+    if (SI != SE) {
+      MachineBasicBlock* SUCC = *SI;
+      availOutSucc = UsedCSRegs - AvailOut[SUCC];
+      for (++SI; SI != SE; ++SI) {
+        SUCC = *SI;
+        // Handle self loop.
+        if (SUCC != MBB)
+          availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+      }
+    } else if (! CSRUsed[MBB].empty()) {
+      // Take care of uses in return blocks (which have no successors).
+      availOutSucc = UsedCSRegs;
+    }
+    // CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc
+    CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+    // Remove the CSRs that are restored in the return blocks.
+    // Lest this be confusing, note that:
+    // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+    if (! CSRRestore[MBB].empty() && ! CSRSave[EntryBlock].empty())
+      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+
+    // Move restores inside loops to the exits of the outermost (top level)
+    // containing loops.
+    std::vector<MachineBasicBlock*> saveBlocks;
+    moveRestoresOutOfLoops(Fn, MBB, saveBlocks);
+
+    // Add saves of CSRs restored in join point MBBs to the ends
+    // of any pred blocks that flow into MBB from regions that
+    // have no uses of MBB's CSRs.
+    addSavesForRJoinBlocks(Fn, saveBlocks);
+
+#ifndef NDEBUG
+    if (! CSRRestore[MBB].empty())
+      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n";
+#endif
+  }
+
+#ifndef NDEBUG
+  DOUT << "-----------------------------------------------------------\n";
+  DOUT << "Final SAVE, RESTORE:\n";
+  DOUT << "-----------------------------------------------------------\n";
+  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+       MBB != E; ++MBB) {
+    if (! CSRSave[MBB].empty()) {
+      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(CSRSave[MBB], Fn);
+      if (CSRRestore[MBB].empty())
+        DOUT << "\n";
+    }
+    if (! CSRRestore[MBB].empty()) {
+      if (! CSRSave[MBB].empty())
+        DOUT << "    ";
+      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n";
+    }
+  }
+#endif
+}
+
 /// calculateCalleeSavedRegisters - Scan the function for modified callee saved
 /// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
 /// the function's frame information and eliminates call frame pseudo
@@ -221,7 +952,8 @@
       unsigned Align = RC->getAlignment();
       unsigned StackAlign = TFI->getStackAlignment();
       // We may not be able to sastify the desired alignment specification of
-      // the TargetRegisterClass if the stack alignment is smaller. Use the min.
+      // the TargetRegisterClass if the stack alignment is smaller.
+      // Use the min.
       Align = std::min(Align, StackAlign);
       FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
@@ -236,41 +968,104 @@
   FFI->setCalleeSavedInfo(CSI);
 }
 
-/// saveCalleeSavedRegisters -  Insert spill code for any callee saved registers
-/// that are modified in the function.
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
 ///
-void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   // Get callee saved register information.
   MachineFrameInfo *FFI = Fn.getFrameInfo();
   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
-  
+
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
     return;
 
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  MachineBasicBlock::iterator I;
+  std::vector<CalleeSavedInfo> blockCSI;
+
+#ifndef NDEBUG
+  DOUT << "Inserting spill/restore code for CSRs in function "
+       << Fn.getFunction()->getName() << "\n";
+#endif
+
+  // Insert spills.
+  for (CSRegBlockMap::iterator
+         BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet save = BI->second;
+
+    if (save.empty())
+      continue;
+
+    if (! ShrinkWrapThisFunction) {
+      // Spill using target interface.
+      I = MBB->begin();
+      if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) {
+        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+          // Add the callee-saved register as live-in. It's killed at the spill.
+          MBB->addLiveIn(CSI[i].getReg());
+
+          // Insert the spill to the stack frame.
+          TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true,
+                                  CSI[i].getFrameIdx(), CSI[i].getRegClass());
+        }
+      }
+    } else {
+#ifndef NDEBUG
+      DOUT << "CSRSave[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(save, Fn) << "\n";
+#endif
+
+      blockCSI.clear();
+      for (CSRegSet::iterator RI = save.begin(),
+             RE = save.end(); RI != RE; ++RI) {
+        blockCSI.push_back(CSI[*RI]);
+      }
+      assert(blockCSI.size() > 0 &&
+             "Could not collect callee saved register info");
+
+      // If MBB has no uses of CSRs being saved, this means saves
+      // must be inserted at the _end_.
+      if (! MBB->empty() && ! CSRUsed[MBB].intersects(save)) {
+        I = MBB->end();
+        --I;
+        if (I->getDesc().isCall()) {
+          ++I;
+        } else {
+          MachineBasicBlock::iterator I2 = I;
+          while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+            I = I2;
+        }
+      } else {
+        I = MBB->begin();
+      }
 
-  // Now that we have a stack slot for each register to be saved, insert spill
-  // code into the entry block.
-  MachineBasicBlock *MBB = Fn.begin();
-  MachineBasicBlock::iterator I = MBB->begin();
-
-  if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) {
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      // Add the callee-saved register as live-in. It's killed at the spill.
-      MBB->addLiveIn(CSI[i].getReg());
-
-      // Insert the spill to the stack frame.
-      TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true,
-                                   CSI[i].getFrameIdx(), CSI[i].getRegClass());
+      // When shrink wrapping, use stack slot stores/loads.
+      for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+        // Add the callee-saved register as live-in.
+        // It's killed at the spill.
+        MBB->addLiveIn(blockCSI[i].getReg());
+
+        // Insert the spill to the stack frame.
+        TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+                                true,
+                                blockCSI[i].getFrameIdx(),
+                                blockCSI[i].getRegClass());
+      }
     }
   }
+  // Use CSRRestore to add code to restore the callee-saved registers in
+  // each block.
+  for (CSRegBlockMap::iterator
+         BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet restore = BI->second;
 
-  // Add code to restore the callee-save registers in each exiting block.
-  for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI)
-    // If last instruction is a return instruction, add an epilogue.
-    if (!FI->empty() && FI->back().getDesc().isReturn()) {
-      MBB = FI;
+    if (restore.empty())
+      continue;
+    if (! ShrinkWrapThisFunction) {
+      // Restore using target interface.
       I = MBB->end(); --I;
 
       // Skip over all terminator instructions, which are part of the return
@@ -283,18 +1078,18 @@
       MachineBasicBlock::iterator BeforeI = I;
       if (!AtStart)
         --BeforeI;
-      
-      // Restore all registers immediately before the return and any terminators
-      // that preceed it.
+
+      // Restore all registers immediately before the return and any
+      // terminators that preceed it.
       if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
-                                        CSI[i].getFrameIdx(),
-                                        CSI[i].getRegClass());
+                                   CSI[i].getFrameIdx(),
+                                   CSI[i].getRegClass());
           assert(I != MBB->begin() &&
                  "loadRegFromStackSlot didn't insert any code!");
-          // Insert in reverse order.  loadRegFromStackSlot can insert multiple
-          // instructions.
+          // Insert in reverse order.  loadRegFromStackSlot can insert
+          // multiple instructions.
           if (AtStart)
             I = MBB->begin();
           else {
@@ -303,7 +1098,79 @@
           }
         }
       }
+    } else {
+#ifndef NDEBUG
+      DOUT << "CSRRestore[" << getBasicBlockName(MBB) << "] = "
+           << stringifyCSRegSet(restore, Fn) << "\n";
+#endif
+
+      blockCSI.clear();
+      for (CSRegSet::iterator RI = restore.begin(),
+             RE = restore.end(); RI != RE; ++RI) {
+        blockCSI.push_back(CSI[*RI]);
+      }
+      assert(blockCSI.size() > 0 &&
+             "Could not find callee saved register info");
+
+      // If MBB uses no CSRs but has restores, this means
+      // it must have restores inserted at the _beginning_.
+      // N.B. -- not necessary if edge splitting done.
+      if (MBB->empty() || ! CSRUsed[MBB].intersects(restore)) {
+        I = MBB->begin();
+      } else {
+        I = MBB->end();
+        --I;
+
+        // EXP iff spill/restore implemented with push/pop:
+        // append restore to block unless it ends in a
+        // barrier terminator instruction.
+
+        // Skip over all terminator instructions, which are part of the
+        // return sequence.
+        if (I->getDesc().isCall()) {
+          ++I;
+        } else {
+          MachineBasicBlock::iterator I2 = I;
+          while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+            I = I2;
+        }
+      }
+
+      bool AtStart = I == MBB->begin();
+      MachineBasicBlock::iterator BeforeI = I;
+      if (!AtStart)
+        --BeforeI;
+
+#ifndef NDEBUG
+      if (! MBB->empty() && ! CSRUsed[MBB].intersects(restore)) {
+        MachineInstr* MI = BeforeI;
+        DOUT << "adding restore after ";
+        DEBUG(MI->dump());
+      } else {
+        DOUT << "adding restore to beginning of "
+             << getBasicBlockName(MBB) << "\n";
+      }
+#endif
+
+      // Restore all registers immediately before the return and any
+      // terminators that preceed it.
+      for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+        TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+                                 blockCSI[i].getFrameIdx(),
+                                 blockCSI[i].getRegClass());
+        assert(I != MBB->begin() &&
+               "loadRegFromStackSlot didn't insert any code!");
+        // Insert in reverse order.  loadRegFromStackSlot can insert
+        // multiple instructions.
+        if (AtStart)
+          I = MBB->begin();
+        else {
+          I = BeforeI;
+          ++I;
+        }
+      }
     }
+  }
 }
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
@@ -358,8 +1225,8 @@
 
   // If there are fixed sized objects that are preallocated in the local area,
   // non-fixed objects can't be allocated right at the start of local area.
-  // We currently don't support filling in holes in between fixed sized objects,
-  // so we adjust 'Offset' to point to the end of last fixed sized
+  // We currently don't support filling in holes in between fixed sized
+  // objects, so we adjust 'Offset' to point to the end of last fixed sized
   // preallocated object.
   for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
@@ -385,8 +1252,8 @@
       Offset += FFI->getObjectSize(i);
 
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack, then
-      // increase the stack alignment to match.
+      // If the alignment of this object is greater than that of the stack,
+      // then increase the stack alignment to match.
       MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
@@ -397,8 +1264,8 @@
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack, then
-      // increase the stack alignment to match.
+      // If the alignment of this object is greater than that of the stack,
+      // then increase the stack alignment to match.
       MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
@@ -453,7 +1320,7 @@
   // offsets will be relative to SP not FP; round up the stack size so this
   // works.
   if (!RegInfo->targetHandlesStackFrameRounding() &&
-      (FFI->hasCalls() || FFI->hasVarSizedObjects() || 
+      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
        (RegInfo->needsStackRealignment(Fn) &&
         FFI->getObjectIndexEnd() != 0))) {
     // If we have reserved argument space for call sites in the function
@@ -509,9 +1376,11 @@
   int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
   int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
 
-  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+  for (MachineFunction::iterator BB = Fn.begin(),
+         E = Fn.end(); BB != E; ++BB) {
     int SPAdj = 0;  // SP offset due to call frame setup / destroy.
     if (RS) RS->enterBasicBlock(BB);
+
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
       if (I->getOpcode() == TargetInstrInfo::DECLARE) {
         // Ignore it.
@@ -560,6 +1429,7 @@
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
+
           TRI.eliminateFrameIndex(MI, SPAdj, RS);
 
           // Reset the iterator if we were at the beginning of the BB.





More information about the llvm-commits mailing list