[llvm-commits] [vector_llvm] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp LegalizeDAG.cpp ScheduleDAG.cpp SelectionDAG.cpp SelectionDAGISel.cpp

Robert Bocchino bocchino at cs.uiuc.edu
Wed Nov 16 10:32:27 PST 2005



Changes in directory llvm/lib/CodeGen/SelectionDAG:

DAGCombiner.cpp updated: 1.47 -> 1.47.2.1
LegalizeDAG.cpp updated: 1.201 -> 1.201.2.1
ScheduleDAG.cpp updated: 1.37 -> 1.37.2.1
SelectionDAG.cpp updated: 1.206 -> 1.206.2.1
SelectionDAGISel.cpp updated: 1.88.2.1 -> 1.88.2.2
---
Log message:

Merged mainline into Vector LLVM branch


---
Diffs of the changes:  (+1099 -552)

 DAGCombiner.cpp      |  357 ++++++++++++++++++++++++++++++++-
 LegalizeDAG.cpp      |   85 +++++--
 ScheduleDAG.cpp      |  544 ++++++++++++++++++++++++++++++---------------------
 SelectionDAG.cpp     |  210 ++++++++++++-------
 SelectionDAGISel.cpp |  455 +++++++++++++++++++++---------------------
 5 files changed, 1099 insertions(+), 552 deletions(-)


Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47.2.1
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47	Tue Oct 18 01:04:22 2005
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp	Wed Nov 16 12:32:15 2005
@@ -180,6 +180,9 @@
                                SDOperand N3, ISD::CondCode CC);
     SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
                             ISD::CondCode Cond, bool foldBooleans = true);
+    
+    SDOperand BuildSDIV(SDNode *N);
+    SDOperand BuildUDIV(SDNode *N);    
 public:
     DAGCombiner(SelectionDAG &D)
       : DAG(D), TLI(D.getTargetLoweringInfo()), AfterLegalize(false) {}
@@ -189,6 +192,178 @@
   };
 }
 
+struct ms {
+  int64_t m;  // magic number
+  int64_t s;  // shift amount
+};
+
+struct mu {
+  uint64_t m; // magic number
+  int64_t a;  // add indicator
+  int64_t s;  // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+  int32_t p;
+  uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint32_t two31 = 0x80000000U;
+  struct ms mag;
+  
+  ad = abs(d);
+  t = two31 + ((uint32_t)d >> 31);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 31;               // initialize p
+  q1 = two31/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two31 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two31/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two31 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 32;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+  int32_t p;
+  uint32_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 31;                   // initialize p
+  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
+  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFF) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x80000000) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 32;  // resulting shift
+  return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+  int64_t p;
+  uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint64_t two63 = 9223372036854775808ULL; // 2^63
+  struct ms mag;
+  
+  ad = d >= 0 ? d : -d;
+  t = two63 + ((uint64_t)d >> 63);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 63;               // initialize p
+  q1 = two63/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two63 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two63/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two63 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = q2 + 1;
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 64;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+  int64_t p;
+  uint64_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 63;                   // initialize p
+  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
+  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x8000000000000000ull) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 64;  // resulting shift
+  return magu;
+}
+
 /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Op and Mask are known to
 /// be the same type.
@@ -209,7 +384,7 @@
     return (Mask & ((1ULL << SrcBits)-1)) == 0; // Returning only the zext bits.
   case ISD::ZERO_EXTEND:
     SrcBits = MVT::getSizeInBits(Op.getOperand(0).getValueType());
-    return MaskedValueIsZero(Op.getOperand(0),Mask & ((1ULL << SrcBits)-1),TLI);
+    return MaskedValueIsZero(Op.getOperand(0),Mask & (~0ULL >> (64-SrcBits)),TLI);
   case ISD::AssertZext:
     SrcBits = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
     return (Mask & ((1ULL << SrcBits)-1)) == 0; // Returning only the zext bits.
@@ -339,7 +514,9 @@
   AfterLegalize = RunningAfterLegalize;
 
   // Add all the dag nodes to the worklist.
-  WorkList.insert(WorkList.end(), DAG.allnodes_begin(), DAG.allnodes_end());
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
   
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted, and tracking any
@@ -560,8 +737,7 @@
   
   // fold (mul c1, c2) -> c1*c2
   if (N0C && N1C)
-    return DAG.getConstant(N0C->getValue() * N1C->getValue(),
-                           N->getValueType(0));
+    return DAG.getConstant(N0C->getValue() * N1C->getValue(), VT);
   // canonicalize constant to RHS
   if (N0C && !N1C)
     return DAG.getNode(ISD::MUL, VT, N1, N0);
@@ -570,13 +746,23 @@
     return N1;
   // fold (mul x, -1) -> 0-x
   if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, N->getValueType(0), 
-                       DAG.getConstant(0, N->getValueType(0)), N0);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
   // fold (mul x, (1 << c)) -> x << c
   if (N1C && isPowerOf2_64(N1C->getValue()))
-    return DAG.getNode(ISD::SHL, N->getValueType(0), N0,
+    return DAG.getNode(ISD::SHL, VT, N0,
                        DAG.getConstant(Log2_64(N1C->getValue()),
                                        TLI.getShiftAmountTy()));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && isPowerOf2_64(-N1C->getSignExtended())) {
+    // FIXME: If the input is something that is easily negated (e.g. a 
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, VT, N0,
+                            DAG.getConstant(Log2_64(-N1C->getSignExtended()),
+                                            TLI.getShiftAmountTy())));
+  }
+  
+  
   // fold (mul (mul x, c1), c2) -> (mul x, c1*c2)
   if (N1C && N0.getOpcode() == ISD::MUL) {
     ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
@@ -602,18 +788,58 @@
   if (N0C && N1C && !N1C->isNullValue())
     return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(),
                            N->getValueType(0));
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSignExtended() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
   if (MaskedValueIsZero(N1, SignBit, TLI) &&
       MaskedValueIsZero(N0, SignBit, TLI))
     return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+  // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1))
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && 
+      (isPowerOf2_64(N1C->getSignExtended()) || 
+       isPowerOf2_64(-N1C->getSignExtended()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDOperand();
+    int64_t pow2 = N1C->getSignExtended();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                TLI.getShiftAmountTy()));
+    WorkList.push_back(SRL.Val);
+    SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL);
+    WorkList.push_back(SGN.Val);
+    SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN, 
+                                DAG.getConstant(Log2_64(abs2),
+                                                TLI.getShiftAmountTy()));
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+    WorkList.push_back(SRA.Val);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+  }
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && 
+      !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildSDIV(N);
+    if (Op.Val) return Op;
+  }
   return SDOperand();
 }
 
 SDOperand DAGCombiner::visitUDIV(SDNode *N) {
   SDOperand N0 = N->getOperand(0);
   SDOperand N1 = N->getOperand(1);
+  MVT::ValueType VT = N->getValueType(0);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
   
@@ -626,6 +852,12 @@
     return DAG.getNode(ISD::SRL, N->getValueType(0), N0,
                        DAG.getConstant(Log2_64(N1C->getValue()),
                                        TLI.getShiftAmountTy()));
+  // fold (udiv x, c) -> alternate
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) {
+    SDOperand Op = BuildUDIV(N);
+    if (Op.Val) return Op;
+  }
+      
   return SDOperand();
 }
 
@@ -733,14 +965,14 @@
                          DAG.getConstant(N1C->getValue()&N01C->getValue(), VT));
   }
   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
-  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+  if (N1C && N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     unsigned ExtendBits =
-    MVT::getSizeInBits(cast<VTSDNode>(N0.getOperand(1))->getVT());
-    if ((N1C->getValue() & (~0ULL << ExtendBits)) == 0)
+        MVT::getSizeInBits(cast<VTSDNode>(N0.getOperand(1))->getVT());
+    if (ExtendBits == 64 || ((N1C->getValue() & (~0ULL << ExtendBits)) == 0))
       return DAG.getNode(ISD::AND, VT, N0.getOperand(0), N1);
   }
   // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
-  if (N0.getOpcode() == ISD::OR && N1C)
+  if (N1C && N0.getOpcode() == ISD::OR)
     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
       if ((ORI->getValue() & N1C->getValue()) == N1C->getValue())
         return N1;
@@ -801,7 +1033,7 @@
     return DAG.getNode(N0.getOpcode(), VT, ANDNode, N0.getOperand(1));
   }
   // fold (and (sra)) -> (and (srl)) when possible.
-  if (N0.getOpcode() == ISD::SRA && N0.Val->hasOneUse())
+  if (N0.getOpcode() == ISD::SRA && N0.Val->hasOneUse()) {
     if (ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       // If the RHS of the AND has zeros where the sign bits of the SRA will
       // land, turn the SRA into an SRL.
@@ -813,7 +1045,7 @@
         return SDOperand();
       }
     }
-      
+  }
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (N0.getOpcode() == ISD::EXTLOAD) {
     MVT::ValueType EVT = cast<VTSDNode>(N0.getOperand(3))->getVT();
@@ -883,7 +1115,16 @@
     if (N01C)
       return DAG.getNode(ISD::OR, VT, N0.getOperand(0),
                          DAG.getConstant(N1C->getValue()|N01C->getValue(), VT));
+  } else if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0),
+                                                 N1),
+                       DAG.getConstant(N1C->getValue() | C1->getValue(), VT));
   }
+  
+  
   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -1747,7 +1988,10 @@
  
   // If this is a store that kills a previous store, remove the previous store.
   if (Chain.getOpcode() == ISD::STORE && Chain.getOperand(2) == Ptr &&
-      Chain.Val->hasOneUse() /* Avoid introducing DAG cycles */) {
+      Chain.Val->hasOneUse() /* Avoid introducing DAG cycles */ &&
+      // Make sure that these stores are the same value type:
+      // FIXME: we really care that the second store is >= size of the first.
+      Value.getValueType() == Chain.getOperand(1).getValueType()) {
     // Create a new store of Value that replaces both stores.
     SDNode *PrevStore = Chain.Val;
     if (PrevStore->getOperand(1) == Value) // Same value multiply stored.
@@ -2152,6 +2396,9 @@
       // Canonicalize setgt X, Min --> setne X, Min
       if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
         return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+      // Canonicalize setlt X, Max --> setne X, Max
+      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+        return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
 
       // If we have setult X, 1, turn it into seteq X, 0
       if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
@@ -2249,7 +2496,7 @@
           if (N0.getOperand(0) == N1.getOperand(1))
             return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond);
           if (N0.getOperand(1) == N1.getOperand(0))
-            return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond);
+            return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond);
         }
       }
 
@@ -2356,6 +2603,86 @@
   return SDOperand();
 }
 
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!TLI.isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildSDIV only operates on i32 or i64
+  if (!TLI.isOperationLegal(ISD::MULHS, VT))
+    return SDOperand();       // Make sure the target supports MULHS.
+  
+  int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+  ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  // If d > 0 and m < 0, add the numerator
+  if (d > 0 && magics.m < 0) { 
+    Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+    WorkList.push_back(Q.Val);
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d < 0 && magics.m > 0) {
+    Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+    WorkList.push_back(Q.Val);
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, VT, Q, 
+                    DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+    WorkList.push_back(Q.Val);
+  }
+  // Extract the sign bit and add it to the quotient
+  SDOperand T =
+    DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                 TLI.getShiftAmountTy()));
+  WorkList.push_back(T.Val);
+  return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+  MVT::ValueType VT = N->getValueType(0);
+  
+  // Check to see if we can do this.
+  if (!TLI.isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+    return SDOperand();       // BuildUDIV only operates on i32 or i64
+  if (!TLI.isOperationLegal(ISD::MULHU, VT))
+    return SDOperand();       // Make sure the target supports MULHU.
+  
+  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  WorkList.push_back(Q.Val);
+
+  if (magics.a == 0) {
+    return DAG.getNode(ISD::SRL, VT, Q, 
+                       DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+  } else {
+    SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+    WorkList.push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 
+                      DAG.getConstant(1, TLI.getShiftAmountTy()));
+    WorkList.push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+    WorkList.push_back(NPQ.Val);
+    return DAG.getNode(ISD::SRL, VT, NPQ, 
+                       DAG.getConstant(magics.s-1, TLI.getShiftAmountTy()));
+  }
+}
+
 // SelectionDAG::Combine - This is the entry point for the file.
 //
 void SelectionDAG::Combine(bool RunningAfterLegalize) {


Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201.2.1
--- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201	Mon Oct 17 19:27:41 2005
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	Wed Nov 16 12:32:15 2005
@@ -394,7 +394,6 @@
   
   // Now that we have N in, add anything that uses it if all of their operands
   // are now done.
-  
   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;++UI)
     ComputeTopDownOrdering(*UI, Order, Visited);
 }
@@ -414,13 +413,15 @@
   // entry node) that have no operands.
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I) {
-    if ((*I)->getNumOperands() == 0) {
-      Visited[*I] = 0 - 1U;
-      ComputeTopDownOrdering(*I, Order, Visited);
+    if (I->getNumOperands() == 0) {
+      Visited[I] = 0 - 1U;
+      ComputeTopDownOrdering(I, Order, Visited);
     }
   }
   
-  assert(Order.size() == Visited.size() && Order.size() == DAG.allnodes_size()&&
+  assert(Order.size() == Visited.size() &&
+         Order.size() == 
+            (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) &&
          "Error: DAG is cyclic!");
   Visited.clear();
   
@@ -632,19 +633,26 @@
     }
     break;
   }
-  case ISD::TokenFactor: {
-    std::vector<SDOperand> Ops;
-    bool Changed = false;
-    // Legalize the operands
-    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-      SDOperand Op = Node->getOperand(i);
-      Ops.push_back(LegalizeOp(Op));
-      Changed |= Ops[i] != Op;
+  case ISD::TokenFactor:
+    if (Node->getNumOperands() == 2) {
+      bool Changed = false;
+      SDOperand Op0 = LegalizeOp(Node->getOperand(0));
+      SDOperand Op1 = LegalizeOp(Node->getOperand(1));
+      if (Op0 != Node->getOperand(0) || Op1 != Node->getOperand(1))
+        Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Op0, Op1);
+    } else {
+      std::vector<SDOperand> Ops;
+      bool Changed = false;
+      // Legalize the operands.
+      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+        SDOperand Op = Node->getOperand(i);
+        Ops.push_back(LegalizeOp(Op));
+        Changed |= Ops[i] != Op;
+      }
+      if (Changed)
+        Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
     }
-    if (Changed)
-      Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
     break;
-  }
 
   case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
@@ -955,14 +963,37 @@
     }
     assert(0 && "Unreachable");
   }
-  case ISD::EXTRACT_ELEMENT:
-    // Get both the low and high parts.
-    ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
-    if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
-      Result = Tmp2;  // 1 -> Hi
-    else
-      Result = Tmp1;  // 0 -> Lo
+  case ISD::EXTRACT_ELEMENT: {
+    MVT::ValueType OpTy = Node->getOperand(0).getValueType();
+    switch (getTypeAction(OpTy)) {
+    default:
+      assert(0 && "EXTRACT_ELEMENT action for type unimplemented!");
+      break;
+    case Legal:
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) {
+        // 1 -> Hi
+        Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0),
+                             DAG.getConstant(MVT::getSizeInBits(OpTy)/2, 
+                                             TLI.getShiftAmountTy()));
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result);
+      } else {
+        // 0 -> Lo
+        Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), 
+                             Node->getOperand(0));
+      }
+      Result = LegalizeOp(Result);
+      break;
+    case Expand:
+      // Get both the low and high parts.
+      ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+      if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
+        Result = Tmp2;  // 1 -> Hi
+      else
+        Result = Tmp1;  // 0 -> Lo
+      break;
+    }
     break;
+  }
 
   case ISD::CopyToReg:
     Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
@@ -1091,6 +1122,11 @@
     if (Tmp1 != Node->getOperand(0))
       Result = DAG.getNode(ISD::PCMARKER, MVT::Other, Tmp1,Node->getOperand(1));
     break;
+  case ISD::READCYCLECOUNTER:
+    Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain
+    if (Tmp1 != Node->getOperand(0))
+      Result = DAG.getNode(ISD::READCYCLECOUNTER, MVT::i64, Tmp1);
+    break;
   case ISD::TRUNCSTORE:
     Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
     Tmp3 = LegalizeOp(Node->getOperand(2));  // Legalize the pointer.
@@ -2243,7 +2279,8 @@
     // legal, such as PowerPC.
     if (Node->getOpcode() == ISD::FP_TO_UINT && 
         !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
-        TLI.isOperationLegal(ISD::FP_TO_SINT, NVT)) {
+        (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+         TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){
       Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1);
     } else {
       Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);


Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37 llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37.2.1
--- llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37	Thu Oct 13 11:44:00 2005
+++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp	Wed Nov 16 12:32:15 2005
@@ -2,7 +2,7 @@
 //
 //                     The LLVM Compiler Infrastructure
 //
-// This file was developed by Chris Lattner and is distributed under the
+// This file was developed by James M. Laskey and is distributed under the
 // University of Illinois Open Source License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/SSARegMap.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -32,6 +33,7 @@
   enum ScheduleChoices {
     noScheduling,
     simpleScheduling,
+    simpleNoItinScheduling
   };
 } // namespace
 
@@ -43,6 +45,8 @@
               "Trivial emission with no analysis"),
     clEnumValN(simpleScheduling, "simple",
               "Minimize critical path and maximize processor utilization"),
+    clEnumValN(simpleNoItinScheduling, "simple-noitin",
+              "Same as simple except using generic latency"),
    clEnumValEnd));
 
 
@@ -97,65 +101,74 @@
   typedef typename std::vector<T>::iterator Iter;
                                         // Tally iterator 
   
-  /// AllInUse - Test to see if all of the resources in the slot are busy (set.)
-  inline bool AllInUse(Iter Cursor, unsigned ResourceSet) {
-    return (*Cursor & ResourceSet) == ResourceSet;
-  }
-
-  /// Skip - Skip over slots that use all of the specified resource (all are
-  /// set.)
-  Iter Skip(Iter Cursor, unsigned ResourceSet) {
-    assert(ResourceSet && "At least one resource bit needs to bet set");
-    
-    // Continue to the end
-    while (true) {
-      // Break out if one of the resource bits is not set
-      if (!AllInUse(Cursor, ResourceSet)) return Cursor;
-      // Try next slot
-      Cursor++;
-      assert(Cursor < Tally.end() && "Tally is not large enough for schedule");
-    }
-  }
-  
-  /// FindSlots - Starting from Begin, locate N consecutive slots where at least 
-  /// one of the resource bits is available.  Returns the address of first slot.
-  Iter FindSlots(Iter Begin, unsigned N, unsigned ResourceSet,
-                                         unsigned &Resource) {
-    // Track position      
-    Iter Cursor = Begin;
+  /// SlotsAvailable - Returns true if all units are available.
+	///
+  bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet,
+                                              unsigned &Resource) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
     
-    // Try all possible slots forward
-    while (true) {
-      // Skip full slots
-      Cursor = Skip(Cursor, ResourceSet);
-      // Determine end of interval
-      Iter End = Cursor + N;
-      assert(End <= Tally.end() && "Tally is not large enough for schedule");
+    // Iterate thru each resource
+    BitsIterator<T> Resources(ResourceSet & ~*Begin);
+    while (unsigned Res = Resources.Next()) {
+      // Check if resource is available for next N slots
+      Iter Interval = End;
+      do {
+        Interval--;
+        if (*Interval & Res) break;
+      } while (Interval != Begin);
       
-      // Iterate thru each resource
-      BitsIterator<T> Resources(ResourceSet & ~*Cursor);
-      while (unsigned Res = Resources.Next()) {
-        // Check if resource is available for next N slots
-        // Break out if resource is busy
-        Iter Interval = Cursor;
-        for (; Interval < End && !(*Interval & Res); Interval++) {}
-        
-        // If available for interval, return where and which resource
-        if (Interval == End) {
-          Resource = Res;
-          return Cursor;
-        }
-        // Otherwise, check if worth checking other resources
-        if (AllInUse(Interval, ResourceSet)) {
-          // Start looking beyond interval
-          Cursor = Interval;
-          break;
-        }
+      // If available for N
+      if (Interval == Begin) {
+        // Success
+        Resource = Res;
+        return true;
       }
-      Cursor++;
     }
+    
+    // No luck
+    Resource = 0;
+    return false;
+  }
+	
+	/// RetrySlot - Finds a good candidate slot to retry search.
+  Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) {
+    assert(N && "Must check availability with N != 0");
+    // Determine end of interval
+    Iter End = Begin + N;
+    assert(End <= Tally.end() && "Tally is not large enough for schedule");
+		
+		while (Begin != End--) {
+			// Clear units in use
+			ResourceSet &= ~*End;
+			// If no units left then we should go no further 
+			if (!ResourceSet) return End + 1;
+		}
+		// Made it all the way through
+		return Begin;
+	}
+  
+  /// FindAndReserveStages - Return true if the stages can be completed. If
+  /// so mark as busy.
+  bool FindAndReserveStages(Iter Begin,
+                            InstrStage *Stage, InstrStage *StageEnd) {
+    // If at last stage then we're done
+    if (Stage == StageEnd) return true;
+    // Get number of cycles for current stage
+    unsigned N = Stage->Cycles;
+    // Check to see if N slots are available, if not fail
+    unsigned Resource;
+    if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false;
+    // Check to see if remaining stages are available, if not fail
+    if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false;
+    // Reserve resource
+    Reserve(Begin, N, Resource);
+    // Success
+    return true;
   }
-  
+
   /// Reserve - Mark busy (set) the specified N slots.
   void Reserve(Iter Begin, unsigned N, unsigned Resource) {
     // Determine end of interval
@@ -167,24 +180,39 @@
       *Begin |= Resource;
   }
 
+  /// FindSlots - Starting from Begin, locate consecutive slots where all stages
+  /// can be completed.  Returns the address of first slot.
+  Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) {
+    // Track position      
+    Iter Cursor = Begin;
+    
+    // Try all possible slots forward
+    while (true) {
+      // Try at cursor, if successful return position.
+      if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor;
+      // Locate a better position
+			Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units);
+    }
+  }
+  
 public:
   /// Initialize - Resize and zero the tally to the specified number of time
   /// slots.
   inline void Initialize(unsigned N) {
     Tally.assign(N, 0);   // Initialize tally to all zeros.
   }
-  
-  // FindAndReserve - Locate and mark busy (set) N bits started at slot I, using
-  // ResourceSet for choices.
-  unsigned FindAndReserve(unsigned I, unsigned N, unsigned ResourceSet) {
-    // Which resource used
-    unsigned Resource;
-    // Find slots for instruction.
-    Iter Where = FindSlots(Tally.begin() + I, N, ResourceSet, Resource);
-    // Reserve the slots
-    Reserve(Where, N, Resource);
-    // Return time slot (index)
-    return Where - Tally.begin();
+
+  // FindAndReserve - Locate an ideal slot for the specified stages and mark
+  // as busy.
+  unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin,
+                                         InstrStage *StageEnd) {
+		// Where to begin 
+		Iter Begin = Tally.begin() + Slot;
+		// Find a free slot
+		Iter Where = FindSlots(Begin, StageBegin, StageEnd);
+		// Distance is slot number
+		unsigned Final = Where - Tally.begin();
+    return Final;
   }
 
 };
@@ -192,27 +220,46 @@
 
 // Forward
 class NodeInfo;
-typedef std::vector<NodeInfo *>           NIVector;
-typedef std::vector<NodeInfo *>::iterator NIIterator;
+typedef NodeInfo *NodeInfoPtr;
+typedef std::vector<NodeInfoPtr>           NIVector;
+typedef std::vector<NodeInfoPtr>::iterator NIIterator;
 
 //===----------------------------------------------------------------------===//
 ///
 /// Node group -  This struct is used to manage flagged node groups.
 ///
-class NodeGroup : public NIVector {
+class NodeGroup {
 private:
+  NIVector      Members;                // Group member nodes
+  NodeInfo      *Dominator;             // Node with highest latency
+  unsigned      Latency;                // Total latency of the group
   int           Pending;                // Number of visits pending before
                                         //    adding to order  
 
 public:
   // Ctor.
-  NodeGroup() : Pending(0) {}
+  NodeGroup() : Dominator(NULL), Pending(0) {}
   
   // Accessors
-  inline NodeInfo *getLeader() { return empty() ? NULL : front(); }
+  inline void setDominator(NodeInfo *D) { Dominator = D; }
+  inline NodeInfo *getDominator() { return Dominator; }
+  inline void setLatency(unsigned L) { Latency = L; }
+  inline unsigned getLatency() { return Latency; }
   inline int getPending() const { return Pending; }
   inline void setPending(int P)  { Pending = P; }
   inline int addPending(int I)  { return Pending += I; }
+  
+  // Pass thru
+  inline bool group_empty() { return Members.empty(); }
+  inline NIIterator group_begin() { return Members.begin(); }
+  inline NIIterator group_end() { return Members.end(); }
+  inline void group_push_back(const NodeInfoPtr &NI) { Members.push_back(NI); }
+  inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) {
+    return Members.insert(Pos, NI);
+  }
+  inline void group_insert(NIIterator Pos, NIIterator First, NIIterator Last) {
+    Members.insert(Pos, First, Last);
+  }
 
   static void Add(NodeInfo *D, NodeInfo *U);
   static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U);
@@ -230,8 +277,9 @@
                                         //    adding to order
 public:
   SDNode        *Node;                  // DAG node
-  unsigned      Latency;                // Cycles to complete instruction
-  unsigned      ResourceSet;            // Bit vector of usable resources
+  InstrStage    *StageBegin;            // First stage in itinerary
+  InstrStage    *StageEnd;              // Last+1 stage in itinerary
+  unsigned      Latency;                // Total cycles to complete instruction
   bool          IsCall;                 // Is function call
   unsigned      Slot;                   // Node's time slot
   NodeGroup     *Group;                 // Grouping information
@@ -244,8 +292,9 @@
   NodeInfo(SDNode *N = NULL)
   : Pending(0)
   , Node(N)
+  , StageBegin(NULL)
+  , StageEnd(NULL)
   , Latency(0)
-  , ResourceSet(0)
   , IsCall(false)
   , Slot(0)
   , Group(NULL)
@@ -257,11 +306,11 @@
   
   // Accessors
   inline bool isInGroup() const {
-    assert(!Group || !Group->empty() && "Group with no members");
+    assert(!Group || !Group->group_empty() && "Group with no members");
     return Group != NULL;
   }
-  inline bool isGroupLeader() const {
-     return isInGroup() && Group->getLeader() == this;
+  inline bool isGroupDominator() const {
+     return isInGroup() && Group->getDominator() == this;
   }
   inline int getPending() const {
     return Group ? Group->getPending() : Pending;
@@ -298,8 +347,8 @@
     if (N->isInGroup()) {
       // get Group
       NodeGroup *Group = NI->Group;
-      NGI = Group->begin();
-      NGE = Group->end();
+      NGI = Group->group_begin();
+      NGE = Group->group_end();
       // Prevent this node from being used (will be in members list
       NI = NULL;
     }
@@ -353,7 +402,8 @@
   
 public:
   // Ctor.
-  NodeGroupOpIterator(NodeInfo *N) : NI(N), GI(N) {}
+  NodeGroupOpIterator(NodeInfo *N)
+    : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {}
   
   /// isEnd - Returns true when not more operands are available.
   ///
@@ -375,15 +425,6 @@
 ///
 class SimpleSched {
 private:
-  // TODO - get ResourceSet from TII
-  enum {
-    RSInteger = 0x3,                    // Two integer units
-    RSFloat = 0xC,                      // Two float units
-    RSLoadStore = 0x30,                 // Two load store units
-    RSBranch = 0x400,                   // One branch unit
-    RSOther = 0                         // Processing unit independent
-  };
-  
   MachineBasicBlock *BB;                // Current basic block
   SelectionDAG &DAG;                    // DAG of the current basic block
   const TargetMachine &TM;              // Target processor
@@ -392,6 +433,7 @@
   SSARegMap *RegMap;                    // Virtual/real register map
   MachineConstantPool *ConstPool;       // Target constant pool
   unsigned NodeCount;                   // Number of nodes in DAG
+  bool HasGroups;                       // True if there are any groups
   NodeInfo *Info;                       // Info for nodes being scheduled
   std::map<SDNode *, NodeInfo *> Map;   // Map nodes to info
   NIVector Ordering;                    // Emit ordering of nodes
@@ -406,7 +448,7 @@
     : BB(bb), DAG(D), TM(D.getTarget()), TII(*TM.getInstrInfo()),
       MRI(*TM.getRegisterInfo()), RegMap(BB->getParent()->getSSARegMap()),
       ConstPool(BB->getParent()->getConstantPool()),
-      NodeCount(0), Info(NULL), Map(), Tally(), NSlots(0) {
+      NodeCount(0), HasGroups(false), Info(NULL), Map(), Tally(), NSlots(0) {
     assert(&TII && "Target doesn't provide instr info?");
     assert(&MRI && "Target doesn't provide register info?");
   }
@@ -439,6 +481,7 @@
   void Schedule();
   void IdentifyGroups();
   void GatherSchedulingInfo();
+  void FakeGroupDominators(); 
   void PrepareNodeInfo();
   bool isStrongDependency(NodeInfo *A, NodeInfo *B);
   bool isWeakDependency(NodeInfo *A, NodeInfo *B);
@@ -458,6 +501,27 @@
   inline void dump(const char *tag) const { std::cerr << tag; dump(); }
   void dump() const;
 };
+
+
+//===----------------------------------------------------------------------===//
+/// Special case itineraries.
+///
+enum {
+  CallLatency = 40,          // To push calls back in time
+
+  RSInteger   = 0xC0000000,  // Two integer units
+  RSFloat     = 0x30000000,  // Two float units
+  RSLoadStore = 0x0C000000,  // Two load store units
+  RSBranch    = 0x02000000   // One branch unit
+};
+static InstrStage CallStage  = { CallLatency, RSBranch };
+static InstrStage LoadStage  = { 5, RSLoadStore };
+static InstrStage StoreStage = { 2, RSLoadStore };
+static InstrStage IntStage   = { 2, RSInteger };
+static InstrStage FloatStage = { 3, RSFloat };
+//===----------------------------------------------------------------------===//
+
+
 //===----------------------------------------------------------------------===//
 
 } // namespace
@@ -491,7 +555,8 @@
       }
     }
     // Merge the two lists
-    DGroup->insert(DGroup->end(), UGroup->begin(), UGroup->end());
+    DGroup->group_insert(DGroup->group_end(),
+                         UGroup->group_begin(), UGroup->group_end());
   } else if (DGroup) {
     // Make user member of definers group
     U->Group = DGroup;
@@ -503,7 +568,7 @@
       // Remove internal edges
       DGroup->addPending(-CountInternalUses(DNI, U));
     }
-    DGroup->push_back(U);
+    DGroup->group_push_back(U);
   } else if (UGroup) {
     // Make definer member of users group
     D->Group = UGroup;
@@ -515,13 +580,13 @@
       // Remove internal edges
       UGroup->addPending(-CountInternalUses(D, UNI));
     }
-    UGroup->insert(UGroup->begin(), D);
+    UGroup->group_insert(UGroup->group_begin(), D);
   } else {
     D->Group = U->Group = DGroup = new NodeGroup();
     DGroup->addPending(D->Node->use_size() + U->Node->use_size() -
                        CountInternalUses(D, U));
-    DGroup->push_back(D);
-    DGroup->push_back(U);
+    DGroup->group_push_back(D);
+    DGroup->group_push_back(U);
   }
 }
 
@@ -529,10 +594,11 @@
 ///
 unsigned NodeGroup::CountInternalUses(NodeInfo *D, NodeInfo *U) {
   unsigned N = 0;
-  for (SDNode:: use_iterator UI = D->Node->use_begin(),
-                             E = D->Node->use_end(); UI != E; UI++) {
-    if (*UI == U->Node) N++;
+  for (unsigned M = U->Node->getNumOperands(); 0 < M--;) {
+    SDOperand Op = U->Node->getOperand(M);
+    if (Op.Val == D->Node) N++;
   }
+
   return N;
 }
 //===----------------------------------------------------------------------===//
@@ -587,9 +653,9 @@
 /// IncludeNode - Add node to NodeInfo vector.
 ///
 void SimpleSched::IncludeNode(NodeInfo *NI) {
-  // Get node
-  SDNode *Node = NI->Node;
-  // Ignore entry node
+// Get node
+SDNode *Node = NI->Node;
+// Ignore entry node
 if (Node->getOpcode() == ISD::EntryToken) return;
   // Check current count for node
   int Count = NI->getPending();
@@ -601,7 +667,7 @@
   if (!Count) {
     // Add node
     if (NI->isInGroup()) {
-      Ordering.push_back(NI->Group->getLeader());
+      Ordering.push_back(NI->Group->getDominator());
     } else {
       Ordering.push_back(NI);
     }
@@ -662,6 +728,8 @@
       if (Op.getValueType() != MVT::Flag) break;
       // Add to node group
       NodeGroup::Add(getNI(Op.Val), NI);
+      // Let evryone else know
+      HasGroups = true;
     }
   }
 }
@@ -669,8 +737,8 @@
 /// GatherSchedulingInfo - Get latency and resource information about each node.
 ///
 void SimpleSched::GatherSchedulingInfo() {
-  // Track if groups are present
-  bool AreGroups = false;
+  // Get instruction itineraries for the target
+  const InstrItineraryData InstrItins = TM.getInstrItineraryData();
   
   // For each node
   for (unsigned i = 0, N = NodeCount; i < N; i++) {
@@ -678,90 +746,87 @@
     NodeInfo* NI = &Info[i];
     SDNode *Node = NI->Node;
     
-    // Test for groups
-    if (NI->isInGroup()) AreGroups = true;
-
-    // FIXME: Pretend by using value type to choose metrics
-    MVT::ValueType VT = Node->getValueType(0);
-    
-    // If machine opcode
-    if (Node->isTargetOpcode()) {
-      MachineOpCode TOpc = Node->getTargetOpcode();
-      // FIXME: This is an ugly (but temporary!) hack to test the scheduler
-      // before we have real target info.
-      // FIXME NI->Latency = std::max(1, TII.maxLatency(TOpc));
-      // FIXME NI->ResourceSet = TII.resources(TOpc);
-      if (TII.isCall(TOpc)) {
-        NI->ResourceSet = RSBranch;
-        NI->Latency = 40;
-        NI->IsCall = true;
-      } else if (TII.isLoad(TOpc)) {
-        NI->ResourceSet = RSLoadStore;
-        NI->Latency = 5;
-      } else if (TII.isStore(TOpc)) {
-        NI->ResourceSet = RSLoadStore;
-        NI->Latency = 2;
-      } else if (MVT::isInteger(VT)) {
-        NI->ResourceSet = RSInteger;
-        NI->Latency = 2;
-      } else if (MVT::isFloatingPoint(VT)) {
-        NI->ResourceSet = RSFloat;
-        NI->Latency = 3;
-      } else {
-        NI->ResourceSet = RSOther;
-        NI->Latency = 0;
-      }
-    } else {
-      if (MVT::isInteger(VT)) {
-        NI->ResourceSet = RSInteger;
-        NI->Latency = 2;
-      } else if (MVT::isFloatingPoint(VT)) {
-        NI->ResourceSet = RSFloat;
-        NI->Latency = 3;
-      } else {
-        NI->ResourceSet = RSOther;
-        NI->Latency = 0;
+    // If there are itineraries and it is a machine instruction
+    if (InstrItins.isEmpty() || ScheduleStyle == simpleNoItinScheduling) {
+      // If machine opcode
+      if (Node->isTargetOpcode()) {
+        // Get return type to guess which processing unit 
+        MVT::ValueType VT = Node->getValueType(0);
+        // Get machine opcode
+        MachineOpCode TOpc = Node->getTargetOpcode();
+        NI->IsCall = TII.isCall(TOpc);
+
+        if (TII.isLoad(TOpc))              NI->StageBegin = &LoadStage;
+        else if (TII.isStore(TOpc))        NI->StageBegin = &StoreStage;
+        else if (MVT::isInteger(VT))       NI->StageBegin = &IntStage;
+        else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage;
+        if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1;
       }
+    } else if (Node->isTargetOpcode()) {
+      // get machine opcode
+      MachineOpCode TOpc = Node->getTargetOpcode();
+      // Check to see if it is a call
+      NI->IsCall = TII.isCall(TOpc);
+      // Get itinerary stages for instruction
+      unsigned II = TII.getSchedClass(TOpc);
+      NI->StageBegin = InstrItins.begin(II);
+      NI->StageEnd = InstrItins.end(II);
+    }
+    
+    // One slot for the instruction itself
+    NI->Latency = 1;
+    
+    // Add long latency for a call to push it back in time
+    if (NI->IsCall) NI->Latency += CallLatency;
+    
+    // Sum up all the latencies
+    for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd;
+        Stage != E; Stage++) {
+      NI->Latency += Stage->Cycles;
     }
     
-    // Add one slot for the instruction itself
-    NI->Latency++;
-    
     // Sum up all the latencies for max tally size
     NSlots += NI->Latency;
   }
   
   // Unify metrics if in a group
-  if (AreGroups) {
+  if (HasGroups) {
     for (unsigned i = 0, N = NodeCount; i < N; i++) {
       NodeInfo* NI = &Info[i];
       
-      if (NI->isGroupLeader()) {
+      if (NI->isInGroup()) {
         NodeGroup *Group = NI->Group;
-        unsigned Latency = 0;
-        unsigned MaxLat = 0;
-        unsigned ResourceSet = 0;
-        bool IsCall = false;
         
-        for (NIIterator NGI = Group->begin(), NGE = Group->end();
-             NGI != NGE; NGI++) {
-          NodeInfo* NGNI = *NGI;
-          Latency += NGNI->Latency;
-          IsCall = IsCall || NGNI->IsCall;
+        if (!Group->getDominator()) {
+          NIIterator NGI = Group->group_begin(), NGE = Group->group_end();
+          NodeInfo *Dominator = *NGI;
+          unsigned Latency = 0;
           
-          if (MaxLat < NGNI->Latency) {
-            MaxLat = NGNI->Latency;
-            ResourceSet = NGNI->ResourceSet;
+          for (NGI++; NGI != NGE; NGI++) {
+            NodeInfo* NGNI = *NGI;
+            Latency += NGNI->Latency;
+            if (Dominator->Latency < NGNI->Latency) Dominator = NGNI;
           }
           
-          NGNI->Latency = 0;
-          NGNI->ResourceSet = 0;
-          NGNI->IsCall = false;
+          Dominator->Latency = Latency;
+          Group->setDominator(Dominator);
         }
-        
-        NI->Latency = Latency;
-        NI->ResourceSet = ResourceSet;
-        NI->IsCall = IsCall;
+      }
+    }
+  }
+}
+
+/// FakeGroupDominators - Set dominators for non-scheduling.
+/// 
+void SimpleSched::FakeGroupDominators() {
+  for (unsigned i = 0, N = NodeCount; i < N; i++) {
+    NodeInfo* NI = &Info[i];
+    
+    if (NI->isInGroup()) {
+      NodeGroup *Group = NI->Group;
+      
+      if (!Group->getDominator()) {
+        Group->setDominator(NI);
       }
     }
   }
@@ -772,21 +837,18 @@
 void SimpleSched::PrepareNodeInfo() {
   // Allocate node information
   Info = new NodeInfo[NodeCount];
-  // Get base of all nodes table
-  SelectionDAG::allnodes_iterator AllNodes = DAG.allnodes_begin();
-  
-  // For each node being scheduled
-  for (unsigned i = 0, N = NodeCount; i < N; i++) {
-    // Get next node from DAG all nodes table
-    SDNode *Node = AllNodes[i];
+
+  unsigned i = 0;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I, ++i) {
     // Fast reference to node schedule info
     NodeInfo* NI = &Info[i];
     // Set up map
-    Map[Node] = NI;
+    Map[I] = NI;
     // Set node
-    NI->Node = Node;
+    NI->Node = I;
     // Set pending visit count
-    NI->setPending(Node->use_size());    
+    NI->setPending(I->use_size());
   }
 }
 
@@ -798,7 +860,8 @@
 }
 
 /// isWeakDependency Return true if node A produces a result that will
-/// conflict with operands of B.
+/// conflict with operands of B.  It is assumed that we have called
+/// isStrongDependency prior.
 bool SimpleSched::isWeakDependency(NodeInfo *A, NodeInfo *B) {
   // TODO check for conflicting real registers and aliases
 #if 0 // FIXME - Since we are in SSA form and not checking register aliasing
@@ -843,9 +906,11 @@
     // If independent of others (or first entry)
     if (Slot == NotFound) Slot = 0;
     
+#if 0 // FIXME - measure later
     // Find a slot where the needed resources are available
-    if (NI->ResourceSet)
-      Slot = Tally.FindAndReserve(Slot, NI->Latency, NI->ResourceSet);
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+#endif
       
     // Set node slot
     NI->Slot = Slot;
@@ -899,8 +964,8 @@
     if (Slot == NotFound) Slot = 0;
     
     // Find a slot where the needed resources are available
-    if (NI->ResourceSet)
-      Slot = Tally.FindAndReserve(Slot, NI->Latency, NI->ResourceSet);
+    if (NI->StageBegin != NI->StageEnd)
+      Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
       
     // Set node slot
     NI->Slot = Slot;
@@ -930,7 +995,7 @@
     // Iterate through nodes
     NodeGroupIterator NGI(Ordering[i]);
     if (NI->isInGroup()) {
-      if (NI->isGroupLeader()) {
+      if (NI->isGroupDominator()) {
         NodeGroupIterator NGI(Ordering[i]);
         while (NodeInfo *NI = NGI.next()) EmitNode(NI);
       }
@@ -1006,7 +1071,28 @@
     
     // Add result register values for things that are defined by this
     // instruction.
-    if (NumResults) VRBase = CreateVirtualRegisters(MI, NumResults, II);
+    
+    // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+    // the CopyToReg'd destination register instead of creating a new vreg.
+    if (NumResults == 1) {
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *Use = *UI;
+        if (Use->getOpcode() == ISD::CopyToReg && 
+            Use->getOperand(2).Val == Node) {
+          unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+          if (MRegisterInfo::isVirtualRegister(Reg)) {
+            VRBase = Reg;
+            MI->addRegOperand(Reg, MachineOperand::Def);
+            break;
+          }
+        }
+      }
+    }
+    
+    // Otherwise, create new virtual registers.
+    if (NumResults && VRBase == 0)
+      VRBase = CreateVirtualRegisters(MI, NumResults, II);
     
     // Emit all of the actual operands of this instruction, adding them to the
     // instruction as appropriate.
@@ -1084,10 +1170,11 @@
     case ISD::TokenFactor:
       break;
     case ISD::CopyToReg: {
-      unsigned Val = getVR(Node->getOperand(2));
-      MRI.copyRegToReg(*BB, BB->end(),
-                       cast<RegisterSDNode>(Node->getOperand(1))->getReg(), Val,
-                       RegMap->getRegClass(Val));
+      unsigned InReg = getVR(Node->getOperand(2));
+      unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (InReg != DestReg)   // Coallesced away the copy?
+        MRI.copyRegToReg(*BB, BB->end(), DestReg, InReg,
+                         RegMap->getRegClass(InReg));
       break;
     }
     case ISD::CopyFromReg: {
@@ -1097,21 +1184,40 @@
         break;
       }
 
+      // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+      // the CopyToReg'd destination register instead of creating a new vreg.
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *Use = *UI;
+        if (Use->getOpcode() == ISD::CopyToReg && 
+            Use->getOperand(2).Val == Node) {
+          unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+          if (MRegisterInfo::isVirtualRegister(DestReg)) {
+            VRBase = DestReg;
+            break;
+          }
+        }
+      }
+
       // Figure out the register class to create for the destreg.
       const TargetRegisterClass *TRC = 0;
+      if (VRBase) {
+        TRC = RegMap->getRegClass(VRBase);
+      } else {
 
-      // Pick the register class of the right type that contains this physreg.
-      for (MRegisterInfo::regclass_iterator I = MRI.regclass_begin(),
-           E = MRI.regclass_end(); I != E; ++I)
-        if ((*I)->getType() == Node->getValueType(0) &&
-            (*I)->contains(SrcReg)) {
-          TRC = *I;
-          break;
-        }
-      assert(TRC && "Couldn't find register class for reg copy!");
+        // Pick the register class of the right type that contains this physreg.
+        for (MRegisterInfo::regclass_iterator I = MRI.regclass_begin(),
+             E = MRI.regclass_end(); I != E; ++I)
+          if ((*I)->getType() == Node->getValueType(0) &&
+              (*I)->contains(SrcReg)) {
+            TRC = *I;
+            break;
+          }
+        assert(TRC && "Couldn't find register class for reg copy!");
       
-      // Create the reg, emit the copy.
-      VRBase = RegMap->createVirtualRegister(TRC);
+        // Create the reg, emit the copy.
+        VRBase = RegMap->createVirtualRegister(TRC);
+      }
       MRI.copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC);
       break;
     }
@@ -1126,11 +1232,23 @@
 ///
 void SimpleSched::Schedule() {
   // Number the nodes
-  NodeCount = DAG.allnodes_size();
-  // Set up minimum info for scheduling.
+  NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end());
+  // Test to see if scheduling should occur
+  bool ShouldSchedule = NodeCount > 3 && ScheduleStyle != noScheduling;
+  // Set up minimum info for scheduling
   PrepareNodeInfo();
   // Construct node groups for flagged nodes
   IdentifyGroups();
+
+  // Don't waste time if is only entry and return
+  if (ShouldSchedule) {
+    // Get latency and resource requirements
+    GatherSchedulingInfo();
+  } else if (HasGroups) {
+    // Make sure all the groups have dominators
+    FakeGroupDominators();
+  }
+
   // Breadth first walk of DAG
   VisitAll();
 
@@ -1144,10 +1262,7 @@
 #endif  
   
   // Don't waste time if is only entry and return
-  if (NodeCount > 3 && ScheduleStyle != noScheduling) {
-    // Get latency and resource requirements
-    GatherSchedulingInfo();
-    
+  if (ShouldSchedule) {
     // Push back long instructions and critical path
     ScheduleBackward();
     
@@ -1182,9 +1297,9 @@
       std::cerr << "  " << NI->Preorder << ". ";
       printSI(std::cerr, NI);
       std::cerr << "\n";
-      if (NI->isGroupLeader()) {
+      if (NI->isGroupDominator()) {
         NodeGroup *Group = NI->Group;
-        for (NIIterator NII = Group->begin(), E = Group->end();
+        for (NIIterator NII = Group->group_begin(), E = Group->group_end();
              NII != E; NII++) {
           std::cerr << "          ";
           printSI(std::cerr, *NII);
@@ -1205,7 +1320,6 @@
   SDNode *Node = NI->Node;
   O << " "
     << std::hex << Node << std::dec
-    << ", RS=" << NI->ResourceSet
     << ", Lat=" << NI->Latency
     << ", Slot=" << NI->Slot
     << ", ARITY=(" << Node->getNumOperands() << ","
@@ -1226,9 +1340,9 @@
     NodeInfo *NI = Ordering[i];
     printSI(O, NI);
     O << "\n";
-    if (NI->isGroupLeader()) {
+    if (NI->isGroupDominator()) {
       NodeGroup *Group = NI->Group;
-      for (NIIterator NII = Group->begin(), E = Group->end();
+      for (NIIterator NII = Group->group_begin(), E = Group->group_end();
            NII != E; NII++) {
         O << "    ";
         printSI(O, *NII);


Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206.2.1
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206	Wed Oct 12 22:11:28 2005
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp	Wed Nov 16 12:32:15 2005
@@ -167,63 +167,63 @@
 /// chain but no other uses and no side effect.  If a node is passed in as an
 /// argument, it is used as the seed for node deletion.
 void SelectionDAG::RemoveDeadNodes(SDNode *N) {
-  std::set<SDNode*> AllNodeSet(AllNodes.begin(), AllNodes.end());
-
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted.
   HandleSDNode Dummy(getRoot());
 
+  bool MadeChange = false;
+  
   // If we have a hint to start from, use it.
-  if (N) DeleteNodeIfDead(N, &AllNodeSet);
-
- Restart:
-  unsigned NumNodes = AllNodeSet.size();
-  for (std::set<SDNode*>::iterator I = AllNodeSet.begin(), E = AllNodeSet.end();
-       I != E; ++I) {
-    // Try to delete this node.
-    DeleteNodeIfDead(*I, &AllNodeSet);
-
-    // If we actually deleted any nodes, do not use invalid iterators in
-    // AllNodeSet.
-    if (AllNodeSet.size() != NumNodes)
-      goto Restart;
-  }
-
-  // Restore AllNodes.
-  if (AllNodes.size() != NumNodes)
-    AllNodes.assign(AllNodeSet.begin(), AllNodeSet.end());
+  if (N && N->use_empty()) {
+    DestroyDeadNode(N);
+    MadeChange = true;
+  }
 
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty() && I->getOpcode() != 65535) {
+      // Node is dead, recursively delete newly dead uses.
+      DestroyDeadNode(I);
+      MadeChange = true;
+    }
+  
+  // Walk the nodes list, removing the nodes we've marked as dead.
+  if (MadeChange) {
+    for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ) {
+      SDNode *N = I++;
+      if (N->use_empty())
+        AllNodes.erase(N);
+    }
+  }
+  
   // If the root changed (e.g. it was a dead load, update the root).
   setRoot(Dummy.getValue());
 }
 
-
-void SelectionDAG::DeleteNodeIfDead(SDNode *N, void *NodeSet) {
-  if (!N->use_empty())
-    return;
-
+/// DestroyDeadNode - We know that N is dead.  Nuke it from the CSE maps for the
+/// graph.  If it is the last user of any of its operands, recursively process
+/// them the same way.
+/// 
+void SelectionDAG::DestroyDeadNode(SDNode *N) {
   // Okay, we really are going to delete this node.  First take this out of the
   // appropriate CSE map.
   RemoveNodeFromCSEMaps(N);
   
   // Next, brutally remove the operand list.  This is safe to do, as there are
   // no cycles in the graph.
-  while (!N->Operands.empty()) {
-    SDNode *O = N->Operands.back().Val;
-    N->Operands.pop_back();
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+    SDNode *O = I->Val;
     O->removeUser(N);
     
     // Now that we removed this operand, see if there are no uses of it left.
-    DeleteNodeIfDead(O, NodeSet);
+    if (O->use_empty())
+      DestroyDeadNode(O);
   }
-  
-  // Remove the node from the nodes set and delete it.
-  std::set<SDNode*> &AllNodeSet = *(std::set<SDNode*>*)NodeSet;
-  AllNodeSet.erase(N);
-  
-  // Now that the node is gone, check to see if any of the operands of this node
-  // are dead now.
-  delete N;  
+  delete[] N->OperandList;
+  N->OperandList = 0;
+  N->NumOperands = 0;
+
+  // Mark the node as dead.
+  N->MorphNodeTo(65535);
 }
 
 void SelectionDAG::DeleteNode(SDNode *N) {
@@ -240,22 +240,14 @@
 void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
 
   // Remove it from the AllNodes list.
-  for (std::vector<SDNode*>::iterator I = AllNodes.begin(); ; ++I) {
-    assert(I != AllNodes.end() && "Node not in AllNodes list??");
-    if (*I == N) {
-      // Erase from the vector, which is not ordered.
-      std::swap(*I, AllNodes.back());
-      AllNodes.pop_back();
-      break;
-    }
-  }
+  AllNodes.remove(N);
     
   // Drop all of the operands and decrement used nodes use counts.
-  while (!N->Operands.empty()) {
-    SDNode *O = N->Operands.back().Val;
-    N->Operands.pop_back();
-    O->removeUser(N);
-  }
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+    I->Val->removeUser(N);
+  delete[] N->OperandList;
+  N->OperandList = 0;
+  N->NumOperands = 0;
   
   delete N;
 }
@@ -312,6 +304,9 @@
   case ISD::ExternalSymbol:
     Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
     break;
+  case ISD::TargetExternalSymbol:
+    Erased = TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
   case ISD::VALUETYPE:
     Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0;
     ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0;
@@ -419,14 +414,18 @@
     AN = N;
   }
   return 0;
-  
 }
 
 
 
 SelectionDAG::~SelectionDAG() {
-  for (unsigned i = 0, e = AllNodes.size(); i != e; ++i)
-    delete AllNodes[i];
+  while (!AllNodes.empty()) {
+    SDNode *N = AllNodes.begin();
+    delete [] N->OperandList;
+    N->OperandList = 0;
+    N->NumOperands = 0;
+    AllNodes.pop_front();
+  }
 }
 
 SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) {
@@ -551,7 +550,15 @@
 SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDOperand(N, 0);
-  N = new ExternalSymbolSDNode(Sym, VT);
+  N = new ExternalSymbolSDNode(false, Sym, VT);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT::ValueType VT) {
+  SDNode *&N = TargetExternalSymbols[Sym];
+  if (N) return SDOperand(N, 0);
+  N = new ExternalSymbolSDNode(true, Sym, VT);
   AllNodes.push_back(N);
   return SDOperand(N, 0);
 }
@@ -1065,9 +1072,9 @@
   assert((getOpcode() == ISD::CALLSEQ_START ||
           getOpcode() == ISD::CALLSEQ_END) && "Cannot adjust this node!");
 
-  Operands[0].Val->removeUser(this);
-  Operands[0] = N;
-  N.Val->Uses.push_back(this);
+  OperandList[0].Val->removeUser(this);
+  OperandList[0] = N;
+  OperandList[0].Val->Uses.push_back(this);
 }
 
 
@@ -1080,7 +1087,7 @@
   N = new SDNode(ISD::LOAD, Chain, Ptr, SV);
 
   // Loads have a token chain.
-  N->setValueTypes(VT, MVT::Other);
+  setNodeValueTypes(N, VT, MVT::Other);
   AllNodes.push_back(N);
   return SDOperand(N, 0);
 }
@@ -1198,7 +1205,7 @@
   case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]);
   default: break;
   }
-
+  
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Ops[1].Val);
   switch (Opcode) {
   default: break;
@@ -1324,11 +1331,46 @@
   } else {
     N = new SDNode(Opcode, Ops);
   }
-  N->setValueTypes(ResultTys);
+  setNodeValueTypes(N, ResultTys);
   AllNodes.push_back(N);
   return SDOperand(N, 0);
 }
 
+void SelectionDAG::setNodeValueTypes(SDNode *N, 
+                                     std::vector<MVT::ValueType> &RetVals) {
+  switch (RetVals.size()) {
+  case 0: return;
+  case 1: N->setValueTypes(RetVals[0]); return;
+  case 2: setNodeValueTypes(N, RetVals[0], RetVals[1]); return;
+  default: break;
+  }
+  
+  std::list<std::vector<MVT::ValueType> >::iterator I =
+    std::find(VTList.begin(), VTList.end(), RetVals);
+  if (I == VTList.end()) {
+    VTList.push_front(RetVals);
+    I = VTList.begin();
+  }
+
+  N->setValueTypes(&(*I)[0], I->size());
+}
+
+void SelectionDAG::setNodeValueTypes(SDNode *N, MVT::ValueType VT1, 
+                                     MVT::ValueType VT2) {
+  for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+       E = VTList.end(); I != E; ++I) {
+    if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2) {
+      N->setValueTypes(&(*I)[0], 2);
+      return;
+    }
+  }
+  std::vector<MVT::ValueType> V;
+  V.push_back(VT1);
+  V.push_back(VT2);
+  VTList.push_front(V);
+  N->setValueTypes(&(*VTList.begin())[0], 2);
+}
+
 
 /// SelectNodeTo - These are used for target selectors to *mutate* the
 /// specified node to have the specified return type, Target opcode, and
@@ -1360,7 +1402,7 @@
                                 SDOperand Op1, SDOperand Op2) {
   RemoveNodeFromCSEMaps(N);
   N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc);
-  N->setValueTypes(VT1, VT2);
+  setNodeValueTypes(N, VT1, VT2);
   N->setOperands(Op1, Op2);
 }
 void SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
@@ -1376,7 +1418,7 @@
                                 SDOperand Op1, SDOperand Op2, SDOperand Op3) {
   RemoveNodeFromCSEMaps(N);
   N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc);
-  N->setValueTypes(VT1, VT2);
+  setNodeValueTypes(N, VT1, VT2);
   N->setOperands(Op1, Op2, Op3);
 }
 
@@ -1417,10 +1459,11 @@
     // This node is about to morph, remove its old self from the CSE maps.
     RemoveNodeFromCSEMaps(U);
     
-    for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
-      if (U->getOperand(i).Val == From) {
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
         From->removeUser(U);
-        U->Operands[i].Val = To;
+        I->Val = To;
         To->addUser(U);
       }
 
@@ -1458,10 +1501,11 @@
     // This node is about to morph, remove its old self from the CSE maps.
     RemoveNodeFromCSEMaps(U);
     
-    for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
-      if (U->getOperand(i).Val == From) {
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
         From->removeUser(U);
-        U->Operands[i].Val = To;
+        I->Val = To;
         To->addUser(U);
       }
         
@@ -1499,11 +1543,12 @@
     // This node is about to morph, remove its old self from the CSE maps.
     RemoveNodeFromCSEMaps(U);
     
-    for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
-      if (U->getOperand(i).Val == From) {
-        const SDOperand &ToOp = To[U->getOperand(i).ResNo];
+    for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+         I != E; ++I)
+      if (I->Val == From) {
+        const SDOperand &ToOp = To[I->ResNo];
         From->removeUser(U);
-        U->Operands[i] = ToOp;
+        *I = ToOp;
         ToOp.Val->addUser(U);
       }
         
@@ -1523,6 +1568,15 @@
 //                              SDNode Class
 //===----------------------------------------------------------------------===//
 
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) {
+  static MVT::ValueType VTs[MVT::LAST_VALUETYPE];
+  VTs[VT] = VT;
+  return &VTs[VT];
+}
+
 /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
 /// indicated value.  This method ignores uses of other values defined by this
 /// operation.
@@ -1570,6 +1624,7 @@
     }
    
   case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
   case ISD::SRCVALUE:      return "SrcValue";
   case ISD::VALUETYPE:     return "ValueType";
   case ISD::EntryToken:    return "EntryToken";
@@ -1586,6 +1641,7 @@
   case ISD::BasicBlock:    return "BasicBlock";
   case ISD::Register:      return "Register";
   case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::TargetConstantPool:  return "TargetConstantPool";
   case ISD::CopyToReg:     return "CopyToReg";
@@ -1771,7 +1827,7 @@
   }
 }
 
-static void DumpNodes(SDNode *N, unsigned indent, const SelectionDAG *G) {
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
     if (N->getOperand(i).Val->hasOneUse())
       DumpNodes(N->getOperand(i).Val, indent+2, G);
@@ -1786,7 +1842,11 @@
 
 void SelectionDAG::dump() const {
   std::cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
-  std::vector<SDNode*> Nodes(AllNodes);
+  std::vector<const SDNode*> Nodes;
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I)
+    Nodes.push_back(I);
+  
   std::sort(Nodes.begin(), Nodes.end());
 
   for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {


Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.1 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.2
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.1	Tue Oct 18 14:21:57 2005
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp	Wed Nov 16 12:32:15 2005
@@ -32,6 +32,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Debug.h"
 #include <map>
 #include <iostream>
@@ -45,7 +46,6 @@
 static const bool ViewDAGs = 0;
 #endif
 
-
 namespace llvm {
   //===--------------------------------------------------------------------===//
   /// FunctionLoweringInfo - This contains information that is global to a
@@ -72,14 +72,6 @@
     /// anywhere in the function.
     std::map<const AllocaInst*, int> StaticAllocaMap;
 
-    /// BlockLocalArguments - If any arguments are only used in a single basic
-    /// block, and if the target can access the arguments without side-effects,
-    /// avoid emitting CopyToReg nodes for those arguments.  This map keeps
-    /// track of which arguments are local to each BB.
-    std::multimap<BasicBlock*, std::pair<Argument*,
-                                         unsigned> > BlockLocalArguments;
-
-
     unsigned MakeReg(MVT::ValueType VT) {
       return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
     }
@@ -125,24 +117,39 @@
   return false;
 }
 
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.
+static bool isOnlyUsedInEntryBlock(Argument *A) {
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry)
+      return false;  // Use not in entry block.
+  return true;
+}
+
 FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
                                            Function &fn, MachineFunction &mf)
     : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
 
-  // Initialize the mapping of values to registers.  This is only set up for
-  // instruction values that are used outside of the block that defines
-  // them.
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
   for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
        AI != E; ++AI)
-    InitializeRegForValue(AI);
+    if (!isOnlyUsedInEntryBlock(AI))
+      InitializeRegForValue(AI);
 
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
   Function::iterator BB = Fn.begin(), EB = Fn.end();
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
       if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(AI->getArraySize())) {
         const Type *Ty = AI->getAllocatedType();
         uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
-        unsigned Align = TLI.getTargetData().getTypeAlignment(Ty);
+        unsigned Align = 
+          std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
+                   AI->getAlignment());
 
         // If the alignment of the value is smaller than the size of the value,
         // and if the size of the value is particularly small (<= 8 bytes),
@@ -151,9 +158,8 @@
         // FIXME: This could be made better with a preferred alignment hook in
         // TargetData.  It serves primarily to 8-byte align doubles for X86.
         if (Align < TySize && TySize <= 8) Align = TySize;
-
-        if (CUI->getValue())           // Don't produce zero sized stack objects
-          TySize *= CUI->getValue();   // Get total allocated size.
+        TySize *= CUI->getValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
         StaticAllocaMap[AI] =
           MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align);
       }
@@ -399,6 +405,7 @@
   void visitStore(StoreInst &I);
   void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
   void visitCall(CallInst &I);
+  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
 
   void visitVAStart(CallInst &I);
   void visitVAArg(VAArgInst &I);
@@ -464,8 +471,8 @@
   case MVT::f64:
     break; // No extension needed!
   }
-
-  DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot(), Op1));
+  // Allow targets to lower this further to meet ABI requirements
+  DAG.setRoot(TLI.LowerReturnTo(getRoot(), Op1, DAG));
 }
 
 void SelectionDAGLowering::visitBr(BranchInst &I) {
@@ -614,24 +621,47 @@
       Ty = StTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
-      if (!isa<Constant>(Idx) || !cast<Constant>(Idx)->isNullValue()) {
-        // N = N + Idx * ElementSize;
-        uint64_t ElementSize = TD.getTypeSize(Ty);
-        SDOperand IdxN = getValue(Idx), Scale = getIntPtrConstant(ElementSize);
-
-        // If the index is smaller or larger than intptr_t, truncate or extend
-        // it.
-        if (IdxN.getValueType() < Scale.getValueType()) {
-          if (Idx->getType()->isSigned())
-            IdxN = DAG.getNode(ISD::SIGN_EXTEND, Scale.getValueType(), IdxN);
-          else
-            IdxN = DAG.getNode(ISD::ZERO_EXTEND, Scale.getValueType(), IdxN);
-        } else if (IdxN.getValueType() > Scale.getValueType())
-          IdxN = DAG.getNode(ISD::TRUNCATE, Scale.getValueType(), IdxN);
 
-        IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getRawValue() == 0) continue;
+
+        uint64_t Offs;
+        if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
+          Offs = (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
+        else
+          Offs = TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
+        N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
+        continue;
+      }
+      
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD.getTypeSize(Ty);
+      SDOperand IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      if (IdxN.getValueType() < N.getValueType()) {
+        if (Idx->getType()->isSigned())
+          IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
+        else
+          IdxN = DAG.getNode(ISD::ZERO_EXTEND, N.getValueType(), IdxN);
+      } else if (IdxN.getValueType() > N.getValueType())
+        IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (isPowerOf2_64(ElementSize)) {
+        unsigned Amt = Log2_64(ElementSize);
+        IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
+                           DAG.getConstant(Amt, TLI.getShiftAmountTy()));
         N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+        continue;
       }
+      
+      SDOperand Scale = getIntPtrConstant(ElementSize);
+      IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+      N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
     }
   }
   setValue(&I, N);
@@ -645,7 +675,8 @@
 
   const Type *Ty = I.getAllocatedType();
   uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
-  unsigned Align = TLI.getTargetData().getTypeAlignment(Ty);
+  unsigned Align = std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
+                            I.getAlignment());
 
   SDOperand AllocSize = getValue(I.getArraySize());
   MVT::ValueType IntPtr = TLI.getPointerTy();
@@ -719,123 +750,144 @@
                           DAG.getSrcValue(I.getOperand(1))));
 }
 
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+  switch (Intrinsic) {
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0;
+  case Intrinsic::frameaddress:  visitFrameReturnAddress(I, true); return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
+    break;
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
+    break;
+  case Intrinsic::memcpy:  visitMemIntrinsic(I, ISD::MEMCPY); return 0;
+  case Intrinsic::memset:  visitMemIntrinsic(I, ISD::MEMSET); return 0;
+  case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return 0;
+    
+  case Intrinsic::readport:
+  case Intrinsic::readio: {
+    std::vector<MVT::ValueType> VTs;
+    VTs.push_back(TLI.getValueType(I.getType()));
+    VTs.push_back(MVT::Other);
+    std::vector<SDOperand> Ops;
+    Ops.push_back(getRoot());
+    Ops.push_back(getValue(I.getOperand(1)));
+    SDOperand Tmp = DAG.getNode(Intrinsic == Intrinsic::readport ?
+                                ISD::READPORT : ISD::READIO, VTs, Ops);
+    
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::writeport:
+  case Intrinsic::writeio:
+    DAG.setRoot(DAG.getNode(Intrinsic == Intrinsic::writeport ?
+                            ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
+                            getRoot(), getValue(I.getOperand(1)),
+                            getValue(I.getOperand(2))));
+    return 0;
+  case Intrinsic::dbg_stoppoint:
+  case Intrinsic::dbg_region_start:
+  case Intrinsic::dbg_region_end:
+  case Intrinsic::dbg_func_start:
+  case Intrinsic::dbg_declare:
+    if (I.getType() != Type::VoidTy)
+      setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
+    return 0;
+    
+  case Intrinsic::isunordered:
+    setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2)), ISD::SETUO));
+    return 0;
+    
+  case Intrinsic::sqrt:
+    setValue(&I, DAG.getNode(ISD::FSQRT,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDOperand Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    std::vector<MVT::ValueType> VTs;
+    VTs.push_back(MVT::i64);
+    VTs.push_back(MVT::Other);
+    std::vector<SDOperand> Ops;
+    Ops.push_back(getRoot());
+    SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, VTs, Ops);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::cttz:
+    setValue(&I, DAG.getNode(ISD::CTTZ,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::ctlz:
+    setValue(&I, DAG.getNode(ISD::CTLZ,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::ctpop:
+    setValue(&I, DAG.getNode(ISD::CTPOP,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  default:
+    std::cerr << I;
+    assert(0 && "This intrinsic is not implemented yet!");
+    return 0;
+  }
+}
+
+
 void SelectionDAGLowering::visitCall(CallInst &I) {
   const char *RenameFn = 0;
-  SDOperand Tmp;
-  if (Function *F = I.getCalledFunction())
+  if (Function *F = I.getCalledFunction()) {
     if (F->isExternal())
-      switch (F->getIntrinsicID()) {
-      case 0:     // Not an LLVM intrinsic.
-        if (F->getName() == "fabs" || F->getName() == "fabsf") {
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      } else {    // Not an LLVM intrinsic.
+        const std::string &Name = F->getName();
+        if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
           if (I.getNumOperands() == 2 &&   // Basic sanity checks.
               I.getOperand(1)->getType()->isFloatingPoint() &&
               I.getType() == I.getOperand(1)->getType()) {
-            Tmp = getValue(I.getOperand(1));
+            SDOperand Tmp = getValue(I.getOperand(1));
             setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
             return;
           }
-        }
-        else if (F->getName() == "sin" || F->getName() == "sinf") {
+        } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
           if (I.getNumOperands() == 2 &&   // Basic sanity checks.
               I.getOperand(1)->getType()->isFloatingPoint() &&
               I.getType() == I.getOperand(1)->getType()) {
-            Tmp = getValue(I.getOperand(1));
+            SDOperand Tmp = getValue(I.getOperand(1));
             setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
             return;
           }
-        }
-        else if (F->getName() == "cos" || F->getName() == "cosf") {
+        } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
           if (I.getNumOperands() == 2 &&   // Basic sanity checks.
               I.getOperand(1)->getType()->isFloatingPoint() &&
               I.getType() == I.getOperand(1)->getType()) {
-            Tmp = getValue(I.getOperand(1));
+            SDOperand Tmp = getValue(I.getOperand(1));
             setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
             return;
           }
         }
-        break;
-      case Intrinsic::vastart:  visitVAStart(I); return;
-      case Intrinsic::vaend:    visitVAEnd(I); return;
-      case Intrinsic::vacopy:   visitVACopy(I); return;
-      case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return;
-      case Intrinsic::frameaddress:  visitFrameReturnAddress(I, true); return;
-
-      case Intrinsic::setjmp:
-        RenameFn = "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
-        break;
-      case Intrinsic::longjmp:
-        RenameFn = "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
-        break;
-      case Intrinsic::memcpy:  visitMemIntrinsic(I, ISD::MEMCPY); return;
-      case Intrinsic::memset:  visitMemIntrinsic(I, ISD::MEMSET); return;
-      case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return;
-
-      case Intrinsic::readport:
-      case Intrinsic::readio: {
-        std::vector<MVT::ValueType> VTs;
-        VTs.push_back(TLI.getValueType(I.getType()));
-        VTs.push_back(MVT::Other);
-        std::vector<SDOperand> Ops;
-        Ops.push_back(getRoot());
-        Ops.push_back(getValue(I.getOperand(1)));
-        Tmp = DAG.getNode(F->getIntrinsicID() == Intrinsic::readport ?
-                          ISD::READPORT : ISD::READIO, VTs, Ops);
-
-        setValue(&I, Tmp);
-        DAG.setRoot(Tmp.getValue(1));
-        return;
-      }
-      case Intrinsic::writeport:
-      case Intrinsic::writeio:
-        DAG.setRoot(DAG.getNode(F->getIntrinsicID() == Intrinsic::writeport ?
-                                ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
-                                getRoot(), getValue(I.getOperand(1)),
-                                getValue(I.getOperand(2))));
-        return;
-      case Intrinsic::dbg_stoppoint:
-      case Intrinsic::dbg_region_start:
-      case Intrinsic::dbg_region_end:
-      case Intrinsic::dbg_func_start:
-      case Intrinsic::dbg_declare:
-        if (I.getType() != Type::VoidTy)
-          setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
-        return;
-
-      case Intrinsic::isunordered:
-        setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
-                                  getValue(I.getOperand(2)), ISD::SETUO));
-        return;
-
-      case Intrinsic::sqrt:
-        setValue(&I, DAG.getNode(ISD::FSQRT,
-                                 getValue(I.getOperand(1)).getValueType(),
-                                 getValue(I.getOperand(1))));
-        return;
-
-      case Intrinsic::pcmarker:
-        Tmp = getValue(I.getOperand(1));
-        DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
-        return;
-      case Intrinsic::cttz:
-        setValue(&I, DAG.getNode(ISD::CTTZ,
-                                 getValue(I.getOperand(1)).getValueType(),
-                                 getValue(I.getOperand(1))));
-        return;
-      case Intrinsic::ctlz:
-        setValue(&I, DAG.getNode(ISD::CTLZ,
-                                 getValue(I.getOperand(1)).getValueType(),
-                                 getValue(I.getOperand(1))));
-        return;
-      case Intrinsic::ctpop:
-        setValue(&I, DAG.getNode(ISD::CTPOP,
-                                 getValue(I.getOperand(1)).getValueType(),
-                                 getValue(I.getOperand(1))));
-        return;
-      default:
-        std::cerr << I;
-        assert(0 && "This intrinsic is not implemented yet!");
-        return;
       }
+  }
 
   SDOperand Callee;
   if (!RenameFn)
@@ -843,7 +895,7 @@
   else
     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
   std::vector<std::pair<SDOperand, const Type*> > Args;
-
+  Args.reserve(I.getNumOperands());
   for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
     Value *Arg = I.getOperand(i);
     SDOperand ArgNode = getValue(Arg);
@@ -912,6 +964,11 @@
   return 0;  
 }
 
+SDOperand TargetLowering::LowerReturnTo(SDOperand Chain, SDOperand Op,
+                                        SelectionDAG &DAG) {
+  return DAG.getNode(ISD::RET, MVT::Other, Chain, Op);
+}
+
 SDOperand TargetLowering::LowerVAStart(SDOperand Chain,
                                        SDOperand VAListP, Value *VAListV,
                                        SelectionDAG &DAG) {
@@ -1022,7 +1079,6 @@
   // updates dom and loop info.
 }
 
-
 bool SelectionDAGISel::runOnFunction(Function &Fn) {
   MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
   RegMap = MF.getSSARegMap();
@@ -1039,7 +1095,7 @@
         if (isa<Constant>(PN->getIncomingValue(i)))
           SplitCriticalEdge(PN->getIncomingBlock(i), BB);
   }
-
+  
   FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
 
   for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
@@ -1081,104 +1137,45 @@
   }
 }
 
-/// IsOnlyUsedInOneBasicBlock - If the specified argument is only used in a
-/// single basic block, return that block.  Otherwise, return a null pointer.
-static BasicBlock *IsOnlyUsedInOneBasicBlock(Argument *A) {
-  if (A->use_empty()) return 0;
-  BasicBlock *BB = cast<Instruction>(A->use_back())->getParent();
-  for (Argument::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E;
-       ++UI)
-    if (isa<PHINode>(*UI) || cast<Instruction>(*UI)->getParent() != BB)
-      return 0;  // Disagreement among the users?
-
-  // Okay, there is a single BB user.  Only permit this optimization if this is
-  // the entry block, otherwise, we might sink argument loads into loops and
-  // stuff.  Later, when we have global instruction selection, this won't be an
-  // issue clearly.
-  if (BB == BB->getParent()->begin())
-    return BB;
-  return 0;
-}
-
 void SelectionDAGISel::
 LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL,
                std::vector<SDOperand> &UnorderedChains) {
   // If this is the entry block, emit arguments.
   Function &F = *BB->getParent();
   FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+  SDOperand OldRoot = SDL.DAG.getRoot();
+  std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
 
-  if (BB == &F.front()) {
-    SDOperand OldRoot = SDL.DAG.getRoot();
-
-    std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
-
-    // If there were side effects accessing the argument list, do not do
-    // anything special.
-    if (OldRoot != SDL.DAG.getRoot()) {
-      unsigned a = 0;
-      for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
-           AI != E; ++AI,++a)
-        if (!AI->use_empty()) {
-          SDL.setValue(AI, Args[a]);
-          
-          SDOperand Copy =
-            CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
-          UnorderedChains.push_back(Copy);
-        }
-    } else {
-      // Otherwise, if any argument is only accessed in a single basic block,
-      // emit that argument only to that basic block.
-      unsigned a = 0;
-      for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
-           AI != E; ++AI,++a)
-        if (!AI->use_empty()) {
-          if (BasicBlock *BBU = IsOnlyUsedInOneBasicBlock(AI)) {
-            FuncInfo.BlockLocalArguments.insert(std::make_pair(BBU,
-                                                      std::make_pair(AI, a)));
-          } else {
-            SDL.setValue(AI, Args[a]);
-            SDOperand Copy =
-              CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
-            UnorderedChains.push_back(Copy);
-          }
-        }
-    }
-
-    // Next, if the function has live ins that need to be copied into vregs,
-    // emit the copies now, into the top of the block.
-    MachineFunction &MF = SDL.DAG.getMachineFunction();
-    if (MF.livein_begin() != MF.livein_end()) {
-      SSARegMap *RegMap = MF.getSSARegMap();
-      const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
-      for (MachineFunction::livein_iterator LI = MF.livein_begin(),
-           E = MF.livein_end(); LI != E; ++LI)
-        if (LI->second)
-          MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
-                           LI->first, RegMap->getRegClass(LI->second));
-    }
+  unsigned a = 0;
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+       AI != E; ++AI, ++a)
+    if (!AI->use_empty()) {
+      SDL.setValue(AI, Args[a]);
       
-    // Finally, if the target has anything special to do, allow it to do so.
-    EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
-  }
-
-  // See if there are any block-local arguments that need to be emitted in this
-  // block.
-
-  if (!FuncInfo.BlockLocalArguments.empty()) {
-    std::multimap<BasicBlock*, std::pair<Argument*, unsigned> >::iterator BLAI =
-      FuncInfo.BlockLocalArguments.lower_bound(BB);
-    if (BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB) {
-      // Lower the arguments into this block.
-      std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
-
-      // Set up the value mapping for the local arguments.
-      for (; BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB;
-           ++BLAI)
-        SDL.setValue(BLAI->second.first, Args[BLAI->second.second]);
-
-      // Any dead arguments will just be ignored here.
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      if (FuncInfo.ValueMap.count(AI)) {
+        SDOperand Copy =
+          CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
+        UnorderedChains.push_back(Copy);
+      }
     }
+
+  // Next, if the function has live ins that need to be copied into vregs,
+  // emit the copies now, into the top of the block.
+  MachineFunction &MF = SDL.DAG.getMachineFunction();
+  if (MF.livein_begin() != MF.livein_end()) {
+    SSARegMap *RegMap = MF.getSSARegMap();
+    const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
+    for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+         E = MF.livein_end(); LI != E; ++LI)
+      if (LI->second)
+        MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+                         LI->first, RegMap->getRegClass(LI->second));
   }
+    
+  // Finally, if the target has anything special to do, allow it to do so.
+  EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
 }
 
 
@@ -1189,8 +1186,9 @@
 
   std::vector<SDOperand> UnorderedChains;
 
-  // Lower any arguments needed in this block.
-  LowerArguments(LLVMBB, SDL, UnorderedChains);
+  // Lower any arguments needed in this block if this is the entry block.
+  if (LLVMBB == &LLVMBB->getParent()->front())
+    LowerArguments(LLVMBB, SDL, UnorderedChains);
 
   BB = FuncInfo.MBBMap[LLVMBB];
   SDL.setCurrentBasicBlock(BB);
@@ -1269,7 +1267,18 @@
 
   // Turn all of the unordered chains into one factored node.
   if (!UnorderedChains.empty()) {
-    UnorderedChains.push_back(SDL.getRoot());
+    SDOperand Root = SDL.getRoot();
+    if (Root.getOpcode() != ISD::EntryToken) {
+      unsigned i = 0, e = UnorderedChains.size();
+      for (; i != e; ++i) {
+        assert(UnorderedChains[i].Val->getNumOperands() > 1);
+        if (UnorderedChains[i].Val->getOperand(0) == Root)
+          break;  // Don't add the root if we already indirectly depend on it.
+      }
+        
+      if (i == e)
+        UnorderedChains.push_back(Root);
+    }
     DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, UnorderedChains));
   }
 






More information about the llvm-commits mailing list