[llvm-commits] [llvm] r109064 - in /llvm/trunk: lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/lsr-on-unrolled-loops.ll

Evan Cheng evan.cheng at apple.com
Wed Jul 21 16:53:58 PDT 2010


Author: evancheng
Date: Wed Jul 21 18:53:58 2010
New Revision: 109064

URL: http://llvm.org/viewvc/llvm-project?rev=109064&view=rev
Log:
More register pressure aware scheduling work.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/test/CodeGen/ARM/lsr-on-unrolled-loops.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=109064&r1=109063&r2=109064&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Wed Jul 21 18:53:58 2010
@@ -280,6 +280,8 @@
   SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
+  AvailableQueue->ScheduledNode(SU);
+
   ReleasePredecessors(SU, CurCycle);
 
   // Release all the implicit physical register defs that are live.
@@ -298,7 +300,6 @@
   }
 
   SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -322,8 +323,6 @@
   DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
   DEBUG(SU->dump(this));
 
-  AvailableQueue->UnscheduledNode(SU);
-
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     CapturePred(&*I);
@@ -353,6 +352,7 @@
   SU->isScheduled = false;
   SU->isAvailable = true;
   AvailableQueue->push(SU);
+  AvailableQueue->UnscheduledNode(SU);
 }
 
 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
@@ -1053,11 +1053,11 @@
 
     /// RegPressure - Tracking current reg pressure per register class.
     ///
-    std::vector<int> RegPressure;
+    std::vector<unsigned> RegPressure;
 
     /// RegLimit - Tracking the number of allocatable registers per register
     /// class.
-    std::vector<int> RegLimit;
+    std::vector<unsigned> RegLimit;
 
   public:
     RegReductionPriorityQueue(MachineFunction &mf,
@@ -1170,61 +1170,41 @@
       SU->NodeQueueId = 0;
     }
 
-    // EstimateSpills - Given a scheduling unit, estimate the number of spills 
-    // it would cause by scheduling it at the current cycle.
-    unsigned EstimateSpills(const SUnit *SU) const {
+    bool HighRegPressure(const SUnit *SU) const {
       if (!TLI)
-        return 0;
+        return false;
 
-      unsigned Spills = 0;
       for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
            I != E; ++I) {
         if (I->isCtrl())
           continue;
         SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
-          continue;
-        const SDNode *N = PredSU->getNode();
-        if (!N->isMachineOpcode())
+        const SDNode *PN = PredSU->getNode();
+        if (!PN->isMachineOpcode()) {
+          if (PN->getOpcode() == ISD::CopyToReg) {
+            EVT VT = PN->getOperand(1).getValueType();
+            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+            unsigned Cost = TLI->getRepRegClassCostFor(VT);
+            if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
+              return true;
+          }
           continue;
-        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+        }
+        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = N->getValueType(i);
-          if (!N->hasAnyUseOfValue(i))
+          EVT VT = PN->getValueType(i);
+          if (!PN->hasAnyUseOfValue(i))
             continue;
           unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
           unsigned Cost = TLI->getRepRegClassCostFor(VT);
           // Check if this increases register pressure of the specific register
           // class to the point where it would cause spills.
-          int Excess = RegPressure[RCId] + Cost - RegLimit[RCId];
-          if (Excess > 0)
-            Spills += Excess;
-        }
-      }
-
-      if (!SU->NumSuccs || !Spills)
-        return Spills;
-      const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode())
-        return Spills;
-      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-      for (unsigned i = 0; i != NumDefs; ++i) {
-        EVT VT = N->getValueType(i);
-        if (!N->hasAnyUseOfValue(i))
-          continue;
-        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        unsigned Cost = TLI->getRepRegClassCostFor(VT);
-        if (RegPressure[RCId] > RegLimit[RCId]) {
-          int Less = RegLimit[RCId] - (RegPressure[RCId] - Cost);
-          if (Less > 0) {
-            if (Spills <= (unsigned)Less)
-              return 0;
-            Spills -= Less;
-          }
+          if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
+            return true;
         }
       }
 
-      return Spills;
+      return false;
     }
 
     void OpenPredLives(SUnit *SU) {
@@ -1232,10 +1212,7 @@
       if (!N->isMachineOpcode())
         return;
       unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::EXTRACT_SUBREG || 
-          Opc == TargetOpcode::INSERT_SUBREG ||
-          Opc == TargetOpcode::SUBREG_TO_REG ||
-          Opc == TargetOpcode::COPY_TO_REGCLASS ||
+      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
           Opc == TargetOpcode::REG_SEQUENCE ||
           Opc == TargetOpcode::IMPLICIT_DEF)
         return;
@@ -1245,10 +1222,19 @@
         if (I->isCtrl())
           continue;
         SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
+        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
           continue;
         const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode())
+        if (!PN->isMachineOpcode()) {
+          if (PN->getOpcode() == ISD::CopyToReg) {
+            EVT VT = PN->getOperand(1).getValueType();
+            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          }
+          continue;
+        }
+        unsigned POpc = PN->getMachineOpcode();
+        if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
@@ -1268,10 +1254,11 @@
         if (!N->hasAnyUseOfValue(i))
           continue;
         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-        if (RegPressure[RCId] < 0)
+        if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
           // Register pressure tracking is imprecise. This can happen.
           RegPressure[RCId] = 0;
+        else
+          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
       }
     }
 
@@ -1280,10 +1267,7 @@
       if (!N->isMachineOpcode())
         return;
       unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::EXTRACT_SUBREG || 
-          Opc == TargetOpcode::INSERT_SUBREG ||
-          Opc == TargetOpcode::SUBREG_TO_REG ||
-          Opc == TargetOpcode::COPY_TO_REGCLASS ||
+      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
           Opc == TargetOpcode::REG_SEQUENCE ||
           Opc == TargetOpcode::IMPLICIT_DEF)
         return;
@@ -1293,10 +1277,19 @@
         if (I->isCtrl())
           continue;
         SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
+        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
           continue;
         const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode())
+        if (!PN->isMachineOpcode()) {
+          if (PN->getOpcode() == ISD::CopyToReg) {
+            EVT VT = PN->getOperand(1).getValueType();
+            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          }
+          continue;
+        }
+        unsigned POpc = PN->getMachineOpcode();
+        if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
@@ -1304,10 +1297,11 @@
           if (!PN->hasAnyUseOfValue(i))
             continue;
           unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-          if (RegPressure[RCId] < 0)
+          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
             // Register pressure tracking is imprecise. This can happen.
             RegPressure[RCId] = 0;
+          else
+            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
         }
       }
 
@@ -1472,30 +1466,39 @@
 }
 
 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
-  bool LStall = left->SchedulingPref == Sched::Latency &&
-    SPQ->getCurCycle() < left->getHeight();
-  bool RStall = right->SchedulingPref == Sched::Latency &&
-    SPQ->getCurCycle() < right->getHeight();
-  // If scheduling one of the node will cause a pipeline stall, delay it.
-  // If scheduling either one of the node will cause a pipeline stall, sort them
-  // according to their height.
-  // If neither will cause a pipeline stall, try to reduce register pressure.
-  if (LStall) {
-    if (!RStall)
-      return true;
-    if (left->getHeight() != right->getHeight())
-      return left->getHeight() > right->getHeight();
-  } else if (RStall)
+  bool LHigh = SPQ->HighRegPressure(left);
+  bool RHigh = SPQ->HighRegPressure(right);
+  if (LHigh && !RHigh)
+    return true;
+  else if (!LHigh && RHigh)
+    return false;
+  else if (!LHigh && !RHigh) {
+    // Low register pressure situation, schedule for latency if possible.
+    bool LStall = left->SchedulingPref == Sched::Latency &&
+      SPQ->getCurCycle() < left->getHeight();
+    bool RStall = right->SchedulingPref == Sched::Latency &&
+      SPQ->getCurCycle() < right->getHeight();
+    // If scheduling one of the node will cause a pipeline stall, delay it.
+    // If scheduling either one of the node will cause a pipeline stall, sort
+    // them according to their height.
+    // If neither will cause a pipeline stall, try to reduce register pressure.
+    if (LStall) {
+      if (!RStall)
+        return true;
+      if (left->getHeight() != right->getHeight())
+        return left->getHeight() > right->getHeight();
+    } else if (RStall)
       return false;
 
-  // If either node is scheduling for latency, sort them by height and latency
-  // first.
-  if (left->SchedulingPref == Sched::Latency ||
-      right->SchedulingPref == Sched::Latency) {
-    if (left->getHeight() != right->getHeight())
-      return left->getHeight() > right->getHeight();
-    if (left->Latency != right->Latency)
-      return left->Latency > right->Latency;
+    // If either node is scheduling for latency, sort them by height and latency
+    // first.
+    if (left->SchedulingPref == Sched::Latency ||
+        right->SchedulingPref == Sched::Latency) {
+      if (left->getHeight() != right->getHeight())
+        return left->getHeight() > right->getHeight();
+      if (left->Latency != right->Latency)
+        return left->Latency > right->Latency;
+    }
   }
 
   return BURRSort(left, right, SPQ);

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=109064&r1=109063&r2=109064&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Jul 21 18:53:58 2010
@@ -557,28 +557,25 @@
   switch (VT.getSimpleVT().SimpleTy) {
   default:
     return TargetLowering::findRepresentativeClass(VT);
-  // Use SPR as representative register class for all floating point
-  // and vector types.
-  case MVT::f32:
-    RRC = ARM::SPRRegisterClass;
-    break;
-  case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+  // Use DPR as representative register class for all floating point
+  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+  // the cost is 1 for both f32 and f64.
+  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
-    RRC = ARM::SPRRegisterClass;
-    Cost = 2;
+    RRC = ARM::DPRRegisterClass;
     break;
   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
   case MVT::v4f32: case MVT::v2f64:
-    RRC = ARM::SPRRegisterClass;
-    Cost = 4;
+    RRC = ARM::DPRRegisterClass;
+    Cost = 2;
     break;
   case MVT::v4i64:
-    RRC = ARM::SPRRegisterClass;
-    Cost = 8;
+    RRC = ARM::DPRRegisterClass;
+    Cost = 4;
     break;
   case MVT::v8i64:
-    RRC = ARM::SPRRegisterClass;
-    Cost = 16;
+    RRC = ARM::DPRRegisterClass;
+    Cost = 8;
     break;
   }
   return std::make_pair(RRC, Cost);

Modified: llvm/trunk/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/lsr-on-unrolled-loops.ll?rev=109064&r1=109063&r2=109064&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/lsr-on-unrolled-loops.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/lsr-on-unrolled-loops.ll Wed Jul 21 18:53:58 2010
@@ -4,14 +4,14 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s0, [r9, #-128]
-; CHECK: vstr.32 s0, [r9, #-96]
-; CHECK: vstr.32 s0, [r9, #-64]
-; CHECK: vstr.32 s0, [r9, #-32]
-; CHECK: vstr.32 s0, [r9]
-; CHECK: vstr.32 s0, [r9, #32]
-; CHECK: vstr.32 s0, [r9, #64]
-; CHECK: vstr.32 s0, [r9, #96]
+; CHECK: vstr.32 s0, [r{{.*}}, #-128]
+; CHECK: vstr.32 s0, [r{{.*}}, #-96]
+; CHECK: vstr.32 s0, [r{{.*}}, #-64]
+; CHECK: vstr.32 s0, [r{{.*}}, #-32]
+; CHECK: vstr.32 s0, [r{{.*}}]
+; CHECK: vstr.32 s0, [r{{.*}}, #32]
+; CHECK: vstr.32 s0, [r{{.*}}, #64]
+; CHECK: vstr.32 s0, [r{{.*}}, #96]
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 





More information about the llvm-commits mailing list