[llvm-commits] [llvm] r129385 - in /llvm/trunk: lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp test/CodeGen/ARM/memcpy-inline.ll test/CodeGen/ARM/vfp.ll

Tue Apr 12 13:14:07 PDT 2011

Author: atrick
Date: Tue Apr 12 15:14:07 2011
New Revision: 129385

URL: http://llvm.org/viewvc/llvm-project?rev=129385&view=rev
Log:
Revert 129383. It causes some targets to hit a scheduler assert.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
    llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
    llvm/trunk/test/CodeGen/ARM/vfp.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=129385&r1=129384&r2=129385&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Tue Apr 12 15:14:07 2011
@@ -102,11 +102,11 @@
 #ifndef NDEBUG
 namespace {
   // For sched=list-ilp, Count the number of times each factor comes into play.
-  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
-         FactStatic, FactOther, NumFactors };
+  enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
+         FactOther, NumFactors };
 }
 static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
 static int FactorCount[NumFactors];
 #endif //!NDEBUG
 
@@ -463,13 +463,6 @@
   if (DisableSchedCycles)
     return;
 
-  // FIXME: Nodes such as CopyFromReg probably should not advance the current
-  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
-  // has predecessors the cycle will be advanced when they are scheduled.
-  // But given the crude nature of modeling latency though such nodes, we
-  // currently need to treat these nodes like real instructions.
-  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
-
   unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
 
   // Bump CurCycle to account for latency. We assume the latency of other
@@ -540,19 +533,16 @@
   }
 }
 
-static void resetVRegCycle(SUnit *SU);
-
 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
-  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
 #ifndef NDEBUG
   if (CurCycle < SU->getHeight())
-    DEBUG(dbgs() << "   Height [" << SU->getHeight()
-          << "] pipeline stall!\n");
+    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
 #endif
 
   // FIXME: Do not modify node height. It may interfere with
@@ -569,7 +559,7 @@
   AvailableQueue->ScheduledNode(SU);
 
   // If HazardRec is disabled, and each inst counts as one cycle, then
-  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+  // advance CurCycle before ReleasePredecessors to avoid useles pushed to
   // PendingQueue for schedulers that implement HasReadyFilter.
   if (!HazardRec->isEnabled() && AvgIPC < 2)
     AdvanceToCycle(CurCycle + 1);
@@ -590,25 +580,20 @@
     }
   }
 
-  resetVRegCycle(SU);
-
   SU->isScheduled = true;
 
   // Conditions under which the scheduler should eagerly advance the cycle:
   // (1) No available instructions
   // (2) All pipelines full, so available instructions must have hazards.
   //
-  // If HazardRec is disabled, the cycle was pre-advanced before calling
-  // ReleasePredecessors. In that case, IssueCount should remain 0.
+  // If HazardRec is disabled, the cycle was advanced earlier.
   //
   // Check AvailableQueue after ReleasePredecessors in case of zero latency.
-  if (HazardRec->isEnabled() || AvgIPC > 1) {
-    if (SU->getNode() && SU->getNode()->isMachineOpcode())
-      ++IssueCount;
-    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
-        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
-      AdvanceToCycle(CurCycle + 1);
-  }
+  ++IssueCount;
+  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+      || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
+      || AvailableQueue->empty())
+    AdvanceToCycle(CurCycle + 1);
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -1235,7 +1220,7 @@
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
-    DEBUG(dbgs() << "Examining Available:\n";
+    DEBUG(dbgs() << "\n*** Examining Available\n";
           AvailableQueue->dump(this));
 
     // Pick the best node to schedule taking all constraints into
@@ -1676,6 +1661,17 @@
     CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
 }
 
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+}
+
 void RegReductionPQBase::addNode(const SUnit *SU) {
   unsigned SUSize = SethiUllmanNumbers.size();
   if (SUnits->size() > SUSize)
@@ -2012,29 +2008,7 @@
   return Scratches;
 }
 
-/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
-/// CopyFromReg from a virtual register.
-static bool hasOnlyLiveInOpers(const SUnit *SU) {
-  bool RetVal = false;
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;
-    const SUnit *PredSU = I->getSUnit();
-    if (PredSU->getNode() &&
-        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
-      unsigned Reg =
-        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        RetVal = true;
-        continue;
-      }
-    }
-    return false;
-  }
-  return RetVal;
-}
-
-/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
 /// CopyToReg to a virtual register. This SU def is probably a liveout and
 /// it has no other use. It should be scheduled closer to the terminator.
 static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -2056,71 +2030,62 @@
   return RetVal;
 }
 
-// Set isVRegCycle for a node with only live in opers and live out uses. Also
-// set isVRegCycle for its CopyFromReg operands.
-//
-// This is only relevant for single-block loops, in which case the VRegCycle
-// node is likely an induction variable in which the operand and target virtual
-// registers should be coalesced (e.g. pre/post increment values). Setting the
-// isVRegCycle flag helps the scheduler prioritize other uses of the same
-// CopyFromReg so that this node becomes the virtual register "kill". This
-// avoids interference between the values live in and out of the block and
-// eliminates a copy inside the loop.
-static void initVRegCycle(SUnit *SU) {
-  if (DisableSchedVRegCycle)
-    return;
-
-  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
-    return;
-
-  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
-
-  SU->isVRegCycle = true;
-
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+  SmallSet<const SUnit*, 4> Preds;
+  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
        I != E; ++I) {
-    if (I->isCtrl()) continue;
-    I->getSUnit()->isVRegCycle = true;
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Preds.insert(I->getSUnit());
   }
-}
-
-// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
-// CopyFromReg operands. We should no longer penalize other uses of this VReg.
-static void resetVRegCycle(SUnit *SU) {
-  if (!SU->isVRegCycle)
-    return;
-
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
        I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    SUnit *PredSU = I->getSUnit();
-    if (PredSU->isVRegCycle) {
-      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
-             "VRegCycle def must be CopyFromReg");
-      I->getSUnit()->isVRegCycle = 0;
-    }
+    if (Preds.count(I->getSUnit()))
+      return true;
   }
+  return false;
 }
 
-// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
-// means a node that defines the VRegCycle has not been scheduled yet.
-static bool hasVRegCycleUse(const SUnit *SU) {
-  // If this SU also defines the VReg, don't hoist it as a "use".
-  if (SU->isVRegCycle)
-    return false;
-
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
-       I != E; ++I) {
+// Return true if the virtual register defined by VRCycleSU may interfere with
+// VRUseSU.
+//
+// Note: We may consider two SU's that use the same value live into a loop as
+// interferng even though the value is not an induction variable. This is an
+// unfortunate consequence of scheduling on the selection DAG.
+static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
+                                       const SUnit *VRUseSU) {
+  for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
+         E = VRCycleSU->Preds.end(); I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    if (I->getSUnit()->isVRegCycle &&
-        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
-      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
-      return true;
+    SDNode *InNode = I->getSUnit()->getNode();
+    if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
+      continue;
+    for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
+           EE = VRUseSU->Preds.end(); II != EE; ++II) {
+      if (II->getSUnit() == I->getSUnit())
+        return true;
     }
   }
   return false;
 }
 
+// Compare the VRegCycle properties of the nodes.
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if priority is equivalent.
+static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
+  if (left->isVRegCycle && !right->isVRegCycle) {
+    if (checkVRegCycleInterference(left, right))
+      return -1;
+  }
+  else if (!left->isVRegCycle && right->isVRegCycle) {
+    if (checkVRegCycleInterference(right, left))
+      return 1;
+  }
+  return 0;
+}
+
 // Check for either a dependence (latency) or resource (hazard) stall.
 //
 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
@@ -2136,12 +2101,23 @@
 // Return 0 if latency-based priority is equivalent.
 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
                             RegReductionPQBase *SPQ) {
-  // Scheduling an instruction that uses a VReg whose postincrement has not yet
-  // been scheduled will induce a copy. Model this as an extra cycle of latency.
-  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
-  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
-  int LHeight = (int)left->getHeight() + LPenalty;
-  int RHeight = (int)right->getHeight() + RPenalty;
+  // If the two nodes share an operand and one of them has a single
+  // use that is a live out copy, favor the one that is live out. Otherwise
+  // it will be difficult to eliminate the copy if the instruction is a
+  // loop induction variable update. e.g.
+  // BB:
+  // sub r1, r3, #1
+  // str r0, [r2, r3]
+  // mov r3, r1
+  // cmp
+  // bne BB
+  bool SharePred = UnitsSharePred(left, right);
+  // FIXME: Only adjust if BB is a loop back edge.
+  // FIXME: What's the cost of a copy?
+  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+  int LHeight = (int)left->getHeight() - LBonus;
+  int RHeight = (int)right->getHeight() - RBonus;
 
   bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
     BUHasStall(left, LHeight, SPQ);
@@ -2152,47 +2128,36 @@
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall) {
-      DEBUG(++FactorCount[FactStall]);
+    if (!RStall)
       return 1;
-    }
-    if (LHeight != RHeight) {
-      DEBUG(++FactorCount[FactStall]);
+    if (LHeight != RHeight)
       return LHeight > RHeight ? 1 : -1;
-    }
-  } else if (RStall) {
-    DEBUG(++FactorCount[FactStall]);
+  } else if (RStall)
     return -1;
-  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
   if (!checkPref || (left->SchedulingPref == Sched::Latency ||
                      right->SchedulingPref == Sched::Latency)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight) {
-        DEBUG(++FactorCount[FactHeight]);
+      if (LHeight != RHeight)
         return LHeight > RHeight ? 1 : -1;
-      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
       // its height is already covered so only its depth matters. We also reach
       // this if both stall but have the same height.
-      int LDepth = left->getDepth() - LPenalty;
-      int RDepth = right->getDepth() - RPenalty;
+      unsigned LDepth = left->getDepth();
+      unsigned RDepth = right->getDepth();
       if (LDepth != RDepth) {
-        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency) {
-      DEBUG(++FactorCount[FactOther]);
+    if (left->Latency != right->Latency)
       return left->Latency > right->Latency ? 1 : -1;
-    }
   }
   return 0;
 }
@@ -2204,19 +2169,7 @@
     DEBUG(++FactorCount[FactStatic]);
     return LPriority > RPriority;
   }
-  else if(LPriority == 0) {
-    // Schedule zero-latency TokenFactor below any other special
-    // nodes. The alternative may be to avoid artificially boosting the
-    // TokenFactor's height when it is scheduled, but we currently rely on an
-    // instruction's final height to equal the cycle in which it is scheduled,
-    // so heights are monotonically increasing.
-    unsigned LOpc = left->getNode() ? left->getNode()->getOpcode() : 0;
-    unsigned ROpc = right->getNode() ? right->getNode()->getOpcode() : 0;
-    if (LOpc == ISD::TokenFactor)
-      return false;
-    if (ROpc == ISD::TokenFactor)
-      return true;
-  }
+  DEBUG(++FactorCount[FactOther]);
 
   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
   // e.g.
@@ -2237,18 +2190,14 @@
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LDist != RDist)
     return LDist < RDist;
-  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LScratch != RScratch)
     return LScratch > RScratch;
-  }
 
   if (!DisableSchedCycles) {
     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
@@ -2256,20 +2205,15 @@
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight()) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (left->getHeight() != right->getHeight())
       return left->getHeight() > right->getHeight();
-    }
 
-    if (left->getDepth() != right->getDepth()) {
-      DEBUG(++FactorCount[FactDepth]);
+    if (left->getDepth() != right->getDepth())
       return left->getDepth() < right->getDepth();
-    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
-  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
@@ -2320,22 +2264,24 @@
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
   }
-  if (!LHigh && !RHigh) {
-    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
-    if (result != 0)
-      return result > 0;
+  int result = 0;
+  if (!DisableSchedVRegCycle) {
+    result = BUCompareVRegCycle(left, right);
   }
+  if (result == 0 && !LHigh && !RHigh) {
+    result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+  }
+  if (result != 0)
+    return result > 0;
   return BURRSort(left, right, SPQ);
 }
 
@@ -2401,6 +2347,12 @@
     if (RReduce && !LReduce) return true;
   }
 
+  if (!DisableSchedVRegCycle) {
+    int result = BUCompareVRegCycle(left, right);
+    if (result != 0)
+      return result > 0;
+  }
+
   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
@@ -2439,24 +2391,6 @@
   return BURRSort(left, right, SPQ);
 }
 
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
-  SUnits = &sunits;
-  // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
-  // Reroute edges to nodes with multiple uses.
-  if (!TracksRegPressure)
-    PrescheduleNodesWithMultipleUses();
-  // Calculate node priorities.
-  CalculateSethiUllmanNumbers();
-
-  // For single block loops, mark nodes that look like canonical IV increments.
-  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
-    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
-      initVRegCycle(&sunits[i]);
-    }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 //                    Preschedule for Register Pressure
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=129385&r1=129384&r2=129385&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Tue Apr 12 15:14:07 2011
@@ -342,6 +342,10 @@
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NodeSUnit->NodeNum);
 
+    // Set isVRegCycle if the node operands are live into and value is live out
+    // of a single block loop.
+    InitVRegCycleFlag(NodeSUnit);
+
     // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
     InitNumRegDefsLeft(NodeSUnit);
 
@@ -412,13 +416,7 @@
           PhysReg = 0;
 
         // If this is a ctrl dep, latency is 1.
-        // Special-case TokenFactor chains as zero-latency.
-        unsigned OpLatency = 1;
-        if (!isChain && OpSU->Latency > 0)
-          OpLatency = OpSU->Latency;
-        else if(isChain && OpN->getOpcode() == ISD::TokenFactor)
-          OpLatency = 0;
-
+        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
@@ -514,6 +512,47 @@
   }
 }
 
+// Set isVRegCycle if this node's single use is CopyToReg and its only active
+// data operands are CopyFromReg.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
+  if (!BB->isSuccessor(BB))
+    return;
+
+  SDNode *N = SU->getNode();
+  if (N->getGluedNode())
+    return;
+
+  if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
+    return;
+
+  bool FoundLiveIn = false;
+  for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
+    EVT OpVT = OI->getValueType();
+    assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+
+    if (OpVT == MVT::Other)
+      continue; // ignore chain operands
+
+    if (isPassiveNode(OI->getNode()))
+      continue; // ignore constants and such
+
+    if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
+      return;
+
+    FoundLiveIn = true;
+  }
+  if (FoundLiveIn)
+    SU->isVRegCycle = true;
+}
+
 void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
   assert(SU->NumRegDefsLeft == 0 && "expect a new node");
   for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {

Modified: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll?rev=129385&r1=129384&r2=129385&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll Tue Apr 12 15:14:07 2011
@@ -1,8 +1,10 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
 
 ; The ARM magic hinting works best with linear scan.
-; CHECK: ldrd
-; CHECK: strd
+; CHECK: ldmia
+; CHECK: stmia
+; CHECK: ldrh
 ; CHECK: ldrb
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }

Modified: llvm/trunk/test/CodeGen/ARM/vfp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vfp.ll?rev=129385&r1=129384&r2=129385&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vfp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vfp.ll Tue Apr 12 15:14:07 2011
@@ -40,8 +40,8 @@
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: vcvt.f64.f32
 ;CHECK: vcvt.f32.f64
+;CHECK: vcvt.f64.f32
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
 	%B = fptrunc double %A to float		; <float> [#uses=1]