[llvm-commits] [llvm] r52266 - in /llvm/trunk: lib/CodeGen/VirtRegMap.cpp test/CodeGen/X86/2008-06-13-SpillerCommuting.ll

Evan Cheng evan.cheng at apple.com
Fri Jun 13 16:58:02 PDT 2008


Author: evancheng
Date: Fri Jun 13 18:58:02 2008
New Revision: 52266

URL: http://llvm.org/viewvc/llvm-project?rev=52266&view=rev
Log:
Teach the spiller to commute instructions in order to fold a reload. This hits 410 times on 444.namd and 122 times on 252.eon.

Added:
    llvm/trunk/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll
Modified:
    llvm/trunk/lib/CodeGen/VirtRegMap.cpp

Modified: llvm/trunk/lib/CodeGen/VirtRegMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegMap.cpp?rev=52266&r1=52265&r2=52266&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/VirtRegMap.cpp (original)
+++ llvm/trunk/lib/CodeGen/VirtRegMap.cpp Fri Jun 13 18:58:02 2008
@@ -36,16 +36,17 @@
 #include <algorithm>
 using namespace llvm;
 
-STATISTIC(NumSpills, "Number of register spills");
-STATISTIC(NumPSpills,"Number of physical register spills");
-STATISTIC(NumReMats, "Number of re-materialization");
-STATISTIC(NumDRM   , "Number of re-materializable defs elided");
-STATISTIC(NumStores, "Number of stores added");
-STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumReused, "Number of values reused");
-STATISTIC(NumDSE   , "Number of dead stores elided");
-STATISTIC(NumDCE   , "Number of copies elided");
-STATISTIC(NumDSS   , "Number of dead spill slots removed");
+STATISTIC(NumSpills  , "Number of register spills");
+STATISTIC(NumPSpills ,"Number of physical register spills");
+STATISTIC(NumReMats  , "Number of re-materialization");
+STATISTIC(NumDRM     , "Number of re-materializable defs elided");
+STATISTIC(NumStores  , "Number of stores added");
+STATISTIC(NumLoads   , "Number of loads added");
+STATISTIC(NumReused  , "Number of values reused");
+STATISTIC(NumDSE     , "Number of dead stores elided");
+STATISTIC(NumDCE     , "Number of copies elided");
+STATISTIC(NumDSS     , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
 
 namespace {
   enum SpillerName { simple, local };
@@ -356,6 +357,13 @@
                            AvailableSpills &Spills, BitVector &RegKills,
                            std::vector<MachineOperand*> &KillOps,
                            VirtRegMap &VRM);
+    bool CommuteToFoldReload(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MII,
+                             unsigned VirtReg, unsigned SrcReg, int SS,
+                             BitVector &RegKills,
+                             std::vector<MachineOperand*> &KillOps,
+                             const TargetRegisterInfo *TRI,
+                             VirtRegMap &VRM);
     void SpillRegToStackSlot(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator &MII,
                              int Idx, unsigned PhysReg, int StackSlot,
@@ -874,12 +882,12 @@
 /// This enables unfolding optimization for a subsequent instruction which will
 /// also eliminate the newly introduced store instruction.
 bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator &MII,
+                                    MachineBasicBlock::iterator &MII,
                                     std::vector<MachineInstr*> &MaybeDeadStores,
-                                     AvailableSpills &Spills,
-                                     BitVector &RegKills,
-                                     std::vector<MachineOperand*> &KillOps,
-                                     VirtRegMap &VRM) {
+                                    AvailableSpills &Spills,
+                                    BitVector &RegKills,
+                                    std::vector<MachineOperand*> &KillOps,
+                                    VirtRegMap &VRM) {
   MachineFunction &MF = *MBB.getParent();
   MachineInstr &MI = *MII;
   unsigned UnfoldedOpc = 0;
@@ -971,6 +979,97 @@
   return false;
 }
 
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalSpiller::CommuteToFoldReload(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator &MII,
+                                    unsigned VirtReg, unsigned SrcReg, int SS,
+                                    BitVector &RegKills,
+                                    std::vector<MachineOperand*> &KillOps,
+                                    const TargetRegisterInfo *TRI,
+                                    VirtRegMap &VRM) {
+  if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  MachineInstr &MI = *MII;
+  MachineBasicBlock::iterator DefMII = prior(MII);
+  MachineInstr *DefMI = DefMII;
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  unsigned NewDstIdx;
+  if (DefMII != MBB.begin() &&
+      TID.isCommutable() &&
+      TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
+    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+    unsigned NewReg = NewDstMO.getReg();
+    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+      return false;
+    MachineInstr *ReloadMI = prior(DefMII);
+    int FrameIdx;
+    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+    if (DestReg != SrcReg || FrameIdx != SS)
+      return false;
+    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+    if (UseIdx == -1)
+      return false;
+    int DefIdx = TID.getOperandConstraint(UseIdx, TOI::TIED_TO);
+    if (DefIdx == -1)
+      return false;
+    assert(DefMI->getOperand(DefIdx).isRegister() &&
+           DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+    // Now commute def instruction.
+    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI);
+    if (!CommutedMI)
+      return false;
+    SmallVector<unsigned, 2> Ops;
+    Ops.push_back(NewDstIdx);
+    MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+    if (!FoldedMI) {
+      if (CommutedMI == DefMI)
+        TII->commuteInstruction(CommutedMI);
+      else
+        MBB.erase(CommutedMI);
+      return false;
+    }
+
+    VRM.addSpillSlotUse(SS, FoldedMI);
+    VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+    // Insert new def MI and spill MI.
+    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+    TII->storeRegToStackSlot(MBB, MI, NewReg, true, SS, RC);
+    MII = prior(MII);
+    MachineInstr *StoreMI = MII;
+    VRM.addSpillSlotUse(SS, StoreMI);
+    VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+    MII = MBB.insert(MII, FoldedMI);  // Update MII to backtrack.
+
+    // Delete all 3 old instructions.
+    InvalidateKills(MI, RegKills, KillOps);
+    VRM.RemoveMachineInstrFromMaps(&MI);
+    MBB.erase(&MI);
+    if (CommutedMI != DefMI)
+      MBB.erase(CommutedMI);
+    InvalidateKills(*DefMI, RegKills, KillOps);
+    VRM.RemoveMachineInstrFromMaps(DefMI);
+    MBB.erase(DefMI);
+    InvalidateKills(*ReloadMI, RegKills, KillOps);
+    VRM.RemoveMachineInstrFromMaps(ReloadMI);
+    MBB.erase(ReloadMI);
+    ++NumCommutes;
+    return true;
+  }
+
+  return false;
+}
+
 /// findSuperReg - Find the SubReg's super-register of given register class
 /// where its SubIdx sub-register is SubReg.
 static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
@@ -1587,15 +1686,23 @@
           if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
             assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
                    "Src hasn't been allocated yet?");
+
+            if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
+                                    RegKills, KillOps, TRI, VRM)) {
+              NextMII = next(MII);
+              BackTracked = true;
+              goto ProcessNextInst;
+            }
+
             // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
             // this as a potentially dead store in case there is a subsequent
             // store into the stack slot without a read from it.
             MaybeDeadStores[StackSlot] = &MI;
 
             // If the stack slot value was previously available in some other
-            // register, change it now.  Otherwise, make the register available,
-            // in PhysReg.
-            Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/);
+            // register, change it now.  Otherwise, make the register
+            // available in PhysReg.
+            Spills.addAvailable(StackSlot, &MI, SrcReg, false/*!clobber*/);
           }
         }
       }

Added: llvm/trunk/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll?rev=52266&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll Fri Jun 13 18:58:02 2008
@@ -0,0 +1,42 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {spiller       - Number of instructions commuted}
+
+	%struct.CABAC_context_element = type { i8, i8 }
+	%struct.MB_Info_CABAC = type { i8, i8, [2 x i8], i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.MotionVector] }
+	%struct.MotionVector = type { i16, i16 }
+	%struct.RBSP2 = type { i32, i32, i16, i8, i16, i16, <1 x i64>, i32, i32, i32*, i32*, i32*, i32*, i32, i32, i32, i32, i32, i8, i16, i8, %struct.MB_Info_CABAC*, %struct.MB_Info_CABAC*, [2 x %struct.MB_Info_CABAC], [12 x i8], [460 x %struct.CABAC_context_element], [10 x i8], [10 x i8], [10 x i16], [4 x [120 x i32]], [15 x [36 x i8]], [6 x [8 x i8]], i16* }
+	%struct.Slice_Info = type { i32, i8, %struct.seq_parameter_set_rbsp_t*, %struct.seq_parameter_set_rbsp_t, i32, i16*, i8, i8, i8, i8, i16, i32 }
+	%struct.seq_parameter_set_rbsp_t = type { i32, i32, i32 }
+ at _ZL21CABAC_CTX_state_table = external constant [64 x i16]		; <[64 x i16]*> [#uses=1]
+ at _ZL15rLPS_table_64x4 = external constant [64 x [4 x i8]]		; <[64 x [4 x i8]]*> [#uses=1]
+
+define i32 @_ZN5RBSP220residual_block_cabacEP10Slice_InfoP13MB_Info_CABACS3_hjhhbPtPs(%struct.RBSP2* %this, %struct.Slice_Info* %slice, %struct.MB_Info_CABAC* %up, %struct.MB_Info_CABAC* %left, i8 zeroext  %maxNumCoeff, i32 %blk_i, i8 zeroext  %iCbCr, i8 zeroext  %ctxBlockCat, i8 zeroext  %intra_flag, i16* %mask, i16* %res) nounwind  {
+entry:
+	%tmp43.i1590 = getelementptr %struct.RBSP2* %this, i32 0, i32 0		; <i32*> [#uses=1]
+	br label %bb803
+
+bb803:		; preds = %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, %entry
+	%numCoeff.11749 = phi i32 [ 0, %entry ], [ %numCoeff.11749.tmp868, %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581 ]		; <i32> [#uses=1]
+	%tmp28.i1503 = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp30.i1504 = getelementptr %struct.RBSP2* %this, i32 0, i32 25, i32 0, i32 0		; <i8*> [#uses=2]
+	%tmp31.i1505 = load i8* %tmp30.i1504, align 1		; <i8> [#uses=1]
+	%tmp3233.i1506 = zext i8 %tmp31.i1505 to i32		; <i32> [#uses=2]
+	%tmp35.i1507 = getelementptr [64 x i16]* @_ZL21CABAC_CTX_state_table, i32 0, i32 %tmp3233.i1506		; <i16*> [#uses=1]
+	%tmp36.i1508 = load i16* %tmp35.i1507, align 2		; <i16> [#uses=1]
+	%tmp363738.i1509 = zext i16 %tmp36.i1508 to i32		; <i32> [#uses=1]
+	%tmp51.i1514 = getelementptr [64 x [4 x i8]]* @_ZL15rLPS_table_64x4, i32 0, i32 %tmp3233.i1506, i32 0		; <i8*> [#uses=1]
+	%tmp52.i1515 = load i8* %tmp51.i1514, align 1		; <i8> [#uses=1]
+	%tmp5758.i1516 = zext i8 %tmp52.i1515 to i32		; <i32> [#uses=1]
+	%tmp60.i1517 = sub i32 0, %tmp5758.i1516		; <i32> [#uses=1]
+	store i32 %tmp60.i1517, i32* %tmp43.i1590, align 16
+	br i1 false, label %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, label %bb.i1537
+
+bb.i1537:		; preds = %bb803
+	unreachable
+
+_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581:		; preds = %bb803
+	%tmp328329.i1580 = trunc i32 %tmp363738.i1509 to i8		; <i8> [#uses=1]
+	store i8 %tmp328329.i1580, i8* %tmp30.i1504, align 1
+	%toBool865 = icmp eq i8 %tmp28.i1503, 0		; <i1> [#uses=1]
+	%numCoeff.11749.tmp868 = select i1 %toBool865, i32 %numCoeff.11749, i32 0		; <i32> [#uses=1]
+	br label %bb803
+}





More information about the llvm-commits mailing list