[llvm-commits] [vector_llvm] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp LegalizeDAG.cpp ScheduleDAG.cpp SelectionDAG.cpp SelectionDAGISel.cpp
Robert Bocchino
bocchino at cs.uiuc.edu
Wed Nov 16 10:32:27 PST 2005
Changes in directory llvm/lib/CodeGen/SelectionDAG:
DAGCombiner.cpp updated: 1.47 -> 1.47.2.1
LegalizeDAG.cpp updated: 1.201 -> 1.201.2.1
ScheduleDAG.cpp updated: 1.37 -> 1.37.2.1
SelectionDAG.cpp updated: 1.206 -> 1.206.2.1
SelectionDAGISel.cpp updated: 1.88.2.1 -> 1.88.2.2
---
Log message:
Merged mainline into Vector LLVM branch
---
Diffs of the changes: (+1099 -552)
DAGCombiner.cpp | 357 ++++++++++++++++++++++++++++++++-
LegalizeDAG.cpp | 85 +++++--
ScheduleDAG.cpp | 544 ++++++++++++++++++++++++++++++---------------------
SelectionDAG.cpp | 210 ++++++++++++-------
SelectionDAGISel.cpp | 455 +++++++++++++++++++++---------------------
5 files changed, 1099 insertions(+), 552 deletions(-)
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47.2.1
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.47 Tue Oct 18 01:04:22 2005
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Nov 16 12:32:15 2005
@@ -180,6 +180,9 @@
SDOperand N3, ISD::CondCode CC);
SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
ISD::CondCode Cond, bool foldBooleans = true);
+
+ SDOperand BuildSDIV(SDNode *N);
+ SDOperand BuildUDIV(SDNode *N);
public:
DAGCombiner(SelectionDAG &D)
: DAG(D), TLI(D.getTargetLoweringInfo()), AfterLegalize(false) {}
@@ -189,6 +192,178 @@
};
}
+struct ms {
+ int64_t m; // magic number
+ int64_t s; // shift amount
+};
+
+struct mu {
+ uint64_t m; // magic number
+ int64_t a; // add indicator
+ int64_t s; // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+ int32_t p;
+ uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+ const uint32_t two31 = 0x80000000U;
+ struct ms mag;
+
+ ad = abs(d);
+ t = two31 + ((uint32_t)d >> 31);
+ anc = t - 1 - t%ad; // absolute value of nc
+ p = 31; // initialize p
+ q1 = two31/anc; // initialize q1 = 2p/abs(nc)
+ r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = two31/ad; // initialize q2 = 2p/abs(d)
+ r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = 2*q1; // update q1 = 2p/abs(nc)
+ r1 = 2*r1; // update r1 = rem(2p/abs(nc))
+ if (r1 >= anc) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = 2*q2; // update q2 = 2p/abs(d)
+ r2 = 2*r2; // update r2 = rem(2p/abs(d))
+ if (r2 >= ad) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+ if (d < 0) mag.m = -mag.m; // resulting magic number
+ mag.s = p - 32; // resulting shift
+ return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+ int32_t p;
+ uint32_t nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ nc = - 1 - (-d)%d;
+ p = 31; // initialize p
+ q1 = 0x80000000/nc; // initialize q1 = 2p/nc
+ r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
+ r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1 >= nc - r1 ) {
+ q1 = 2*q1 + 1; // update q1
+ r1 = 2*r1 - nc; // update r1
+ }
+ else {
+ q1 = 2*q1; // update q1
+ r1 = 2*r1; // update r1
+ }
+ if (r2 + 1 >= d - r2) {
+ if (q2 >= 0x7FFFFFFF) magu.a = 1;
+ q2 = 2*q2 + 1; // update q2
+ r2 = 2*r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2 >= 0x80000000) magu.a = 1;
+ q2 = 2*q2; // update q2
+ r2 = 2*r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - 32; // resulting shift
+ return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+ int64_t p;
+ uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+ const uint64_t two63 = 9223372036854775808ULL; // 2^63
+ struct ms mag;
+
+ ad = d >= 0 ? d : -d;
+ t = two63 + ((uint64_t)d >> 63);
+ anc = t - 1 - t%ad; // absolute value of nc
+ p = 63; // initialize p
+ q1 = two63/anc; // initialize q1 = 2p/abs(nc)
+ r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = two63/ad; // initialize q2 = 2p/abs(d)
+ r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = 2*q1; // update q1 = 2p/abs(nc)
+ r1 = 2*r1; // update r1 = rem(2p/abs(nc))
+ if (r1 >= anc) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = 2*q2; // update q2 = 2p/abs(d)
+ r2 = 2*r2; // update r2 = rem(2p/abs(d))
+ if (r2 >= ad) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d < 0) mag.m = -mag.m; // resulting magic number
+ mag.s = p - 64; // resulting shift
+ return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+ int64_t p;
+ uint64_t nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ nc = - 1 - (-d)%d;
+ p = 63; // initialize p
+ q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc
+ r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d
+ r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1 >= nc - r1 ) {
+ q1 = 2*q1 + 1; // update q1
+ r1 = 2*r1 - nc; // update r1
+ }
+ else {
+ q1 = 2*q1; // update q1
+ r1 = 2*r1; // update r1
+ }
+ if (r2 + 1 >= d - r2) {
+ if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+ q2 = 2*q2 + 1; // update q2
+ r2 = 2*r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2 >= 0x8000000000000000ull) magu.a = 1;
+ q2 = 2*q2; // update q2
+ r2 = 2*r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - 64; // resulting shift
+ return magu;
+}
+
/// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Op and Mask are known to
/// be the same type.
@@ -209,7 +384,7 @@
return (Mask & ((1ULL << SrcBits)-1)) == 0; // Returning only the zext bits.
case ISD::ZERO_EXTEND:
SrcBits = MVT::getSizeInBits(Op.getOperand(0).getValueType());
- return MaskedValueIsZero(Op.getOperand(0),Mask & ((1ULL << SrcBits)-1),TLI);
+ return MaskedValueIsZero(Op.getOperand(0),Mask & (~0ULL >> (64-SrcBits)),TLI);
case ISD::AssertZext:
SrcBits = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
return (Mask & ((1ULL << SrcBits)-1)) == 0; // Returning only the zext bits.
@@ -339,7 +514,9 @@
AfterLegalize = RunningAfterLegalize;
// Add all the dag nodes to the worklist.
- WorkList.insert(WorkList.end(), DAG.allnodes_begin(), DAG.allnodes_end());
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -560,8 +737,7 @@
// fold (mul c1, c2) -> c1*c2
if (N0C && N1C)
- return DAG.getConstant(N0C->getValue() * N1C->getValue(),
- N->getValueType(0));
+ return DAG.getConstant(N0C->getValue() * N1C->getValue(), VT);
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::MUL, VT, N1, N0);
@@ -570,13 +746,23 @@
return N1;
// fold (mul x, -1) -> 0-x
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, N->getValueType(0),
- DAG.getConstant(0, N->getValueType(0)), N0);
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (N1C && isPowerOf2_64(N1C->getValue()))
- return DAG.getNode(ISD::SHL, N->getValueType(0), N0,
+ return DAG.getNode(ISD::SHL, VT, N0,
DAG.getConstant(Log2_64(N1C->getValue()),
TLI.getShiftAmountTy()));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && isPowerOf2_64(-N1C->getSignExtended())) {
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, VT, N0,
+ DAG.getConstant(Log2_64(-N1C->getSignExtended()),
+ TLI.getShiftAmountTy())));
+ }
+
+
// fold (mul (mul x, c1), c2) -> (mul x, c1*c2)
if (N1C && N0.getOpcode() == ISD::MUL) {
ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
@@ -602,18 +788,58 @@
if (N0C && N1C && !N1C->isNullValue())
return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(),
N->getValueType(0));
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSignExtended() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
if (MaskedValueIsZero(N1, SignBit, TLI) &&
MaskedValueIsZero(N0, SignBit, TLI))
return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+ // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1))
+ if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSignExtended()) ||
+ isPowerOf2_64(-N1C->getSignExtended()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDOperand();
+ int64_t pow2 = N1C->getSignExtended();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0,
+ DAG.getConstant(MVT::getSizeInBits(VT)-1,
+ TLI.getShiftAmountTy()));
+ WorkList.push_back(SRL.Val);
+ SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL);
+ WorkList.push_back(SGN.Val);
+ SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN,
+ DAG.getConstant(Log2_64(abs2),
+ TLI.getShiftAmountTy()));
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+ WorkList.push_back(SRA.Val);
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+ }
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDOperand Op = BuildSDIV(N);
+ if (Op.Val) return Op;
+ }
return SDOperand();
}
SDOperand DAGCombiner::visitUDIV(SDNode *N) {
SDOperand N0 = N->getOperand(0);
SDOperand N1 = N->getOperand(1);
+ MVT::ValueType VT = N->getValueType(0);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
@@ -626,6 +852,12 @@
return DAG.getNode(ISD::SRL, N->getValueType(0), N0,
DAG.getConstant(Log2_64(N1C->getValue()),
TLI.getShiftAmountTy()));
+ // fold (udiv x, c) -> alternate
+ if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) {
+ SDOperand Op = BuildUDIV(N);
+ if (Op.Val) return Op;
+ }
+
return SDOperand();
}
@@ -733,14 +965,14 @@
DAG.getConstant(N1C->getValue()&N01C->getValue(), VT));
}
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
- if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ if (N1C && N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
unsigned ExtendBits =
- MVT::getSizeInBits(cast<VTSDNode>(N0.getOperand(1))->getVT());
- if ((N1C->getValue() & (~0ULL << ExtendBits)) == 0)
+ MVT::getSizeInBits(cast<VTSDNode>(N0.getOperand(1))->getVT());
+ if (ExtendBits == 64 || ((N1C->getValue() & (~0ULL << ExtendBits)) == 0))
return DAG.getNode(ISD::AND, VT, N0.getOperand(0), N1);
}
// fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
- if (N0.getOpcode() == ISD::OR && N1C)
+ if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if ((ORI->getValue() & N1C->getValue()) == N1C->getValue())
return N1;
@@ -801,7 +1033,7 @@
return DAG.getNode(N0.getOpcode(), VT, ANDNode, N0.getOperand(1));
}
// fold (and (sra)) -> (and (srl)) when possible.
- if (N0.getOpcode() == ISD::SRA && N0.Val->hasOneUse())
+ if (N0.getOpcode() == ISD::SRA && N0.Val->hasOneUse()) {
if (ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// If the RHS of the AND has zeros where the sign bits of the SRA will
// land, turn the SRA into an SRL.
@@ -813,7 +1045,7 @@
return SDOperand();
}
}
-
+ }
// fold (zext_inreg (extload x)) -> (zextload x)
if (N0.getOpcode() == ISD::EXTLOAD) {
MVT::ValueType EVT = cast<VTSDNode>(N0.getOperand(3))->getVT();
@@ -883,7 +1115,16 @@
if (N01C)
return DAG.getNode(ISD::OR, VT, N0.getOperand(0),
DAG.getConstant(N1C->getValue()|N01C->getValue(), VT));
+ } else if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0),
+ N1),
+ DAG.getConstant(N1C->getValue() | C1->getValue(), VT));
}
+
+
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -1747,7 +1988,10 @@
// If this is a store that kills a previous store, remove the previous store.
if (Chain.getOpcode() == ISD::STORE && Chain.getOperand(2) == Ptr &&
- Chain.Val->hasOneUse() /* Avoid introducing DAG cycles */) {
+ Chain.Val->hasOneUse() /* Avoid introducing DAG cycles */ &&
+ // Make sure that these stores are the same value type:
+ // FIXME: we really care that the second store is >= size of the first.
+ Value.getValueType() == Chain.getOperand(1).getValueType()) {
// Create a new store of Value that replaces both stores.
SDNode *PrevStore = Chain.Val;
if (PrevStore->getOperand(1) == Value) // Same value multiply stored.
@@ -2152,6 +2396,9 @@
// Canonicalize setgt X, Min --> setne X, Min
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
// If we have setult X, 1, turn it into seteq X, 0
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
@@ -2249,7 +2496,7 @@
if (N0.getOperand(0) == N1.getOperand(1))
return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond);
if (N0.getOperand(1) == N1.getOperand(0))
- return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond);
}
}
@@ -2356,6 +2603,86 @@
return SDOperand();
}
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+ MVT::ValueType VT = N->getValueType(0);
+
+ // Check to see if we can do this.
+ if (!TLI.isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+ return SDOperand(); // BuildSDIV only operates on i32 or i64
+ if (!TLI.isOperationLegal(ISD::MULHS, VT))
+ return SDOperand(); // Make sure the target supports MULHS.
+
+ int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+ ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+
+ // Multiply the numerator (operand 0) by the magic value
+ SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ // If d > 0 and m < 0, add the numerator
+ if (d > 0 && magics.m < 0) {
+ Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+ WorkList.push_back(Q.Val);
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d < 0 && magics.m > 0) {
+ Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+ WorkList.push_back(Q.Val);
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, VT, Q,
+ DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+ WorkList.push_back(Q.Val);
+ }
+ // Extract the sign bit and add it to the quotient
+ SDOperand T =
+ DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
+ TLI.getShiftAmountTy()));
+ WorkList.push_back(T.Val);
+ return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+ MVT::ValueType VT = N->getValueType(0);
+
+ // Check to see if we can do this.
+ if (!TLI.isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+ return SDOperand(); // BuildUDIV only operates on i32 or i64
+ if (!TLI.isOperationLegal(ISD::MULHU, VT))
+ return SDOperand(); // Make sure the target supports MULHU.
+
+ uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+ mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+
+ // Multiply the numerator (operand 0) by the magic value
+ SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ WorkList.push_back(Q.Val);
+
+ if (magics.a == 0) {
+ return DAG.getNode(ISD::SRL, VT, Q,
+ DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+ } else {
+ SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+ WorkList.push_back(NPQ.Val);
+ NPQ = DAG.getNode(ISD::SRL, VT, NPQ,
+ DAG.getConstant(1, TLI.getShiftAmountTy()));
+ WorkList.push_back(NPQ.Val);
+ NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+ WorkList.push_back(NPQ.Val);
+ return DAG.getNode(ISD::SRL, VT, NPQ,
+ DAG.getConstant(magics.s-1, TLI.getShiftAmountTy()));
+ }
+}
+
// SelectionDAG::Combine - This is the entry point for the file.
//
void SelectionDAG::Combine(bool RunningAfterLegalize) {
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201.2.1
--- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.201 Mon Oct 17 19:27:41 2005
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Wed Nov 16 12:32:15 2005
@@ -394,7 +394,6 @@
// Now that we have N in, add anything that uses it if all of their operands
// are now done.
-
for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;++UI)
ComputeTopDownOrdering(*UI, Order, Visited);
}
@@ -414,13 +413,15 @@
// entry node) that have no operands.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
- if ((*I)->getNumOperands() == 0) {
- Visited[*I] = 0 - 1U;
- ComputeTopDownOrdering(*I, Order, Visited);
+ if (I->getNumOperands() == 0) {
+ Visited[I] = 0 - 1U;
+ ComputeTopDownOrdering(I, Order, Visited);
}
}
- assert(Order.size() == Visited.size() && Order.size() == DAG.allnodes_size()&&
+ assert(Order.size() == Visited.size() &&
+ Order.size() ==
+ (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) &&
"Error: DAG is cyclic!");
Visited.clear();
@@ -632,19 +633,26 @@
}
break;
}
- case ISD::TokenFactor: {
- std::vector<SDOperand> Ops;
- bool Changed = false;
- // Legalize the operands
- for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
- SDOperand Op = Node->getOperand(i);
- Ops.push_back(LegalizeOp(Op));
- Changed |= Ops[i] != Op;
+ case ISD::TokenFactor:
+ if (Node->getNumOperands() == 2) {
+ bool Changed = false;
+ SDOperand Op0 = LegalizeOp(Node->getOperand(0));
+ SDOperand Op1 = LegalizeOp(Node->getOperand(1));
+ if (Op0 != Node->getOperand(0) || Op1 != Node->getOperand(1))
+ Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Op0, Op1);
+ } else {
+ std::vector<SDOperand> Ops;
+ bool Changed = false;
+ // Legalize the operands.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ SDOperand Op = Node->getOperand(i);
+ Ops.push_back(LegalizeOp(Op));
+ Changed |= Ops[i] != Op;
+ }
+ if (Changed)
+ Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
}
- if (Changed)
- Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
break;
- }
case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
@@ -955,14 +963,37 @@
}
assert(0 && "Unreachable");
}
- case ISD::EXTRACT_ELEMENT:
- // Get both the low and high parts.
- ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
- if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
- Result = Tmp2; // 1 -> Hi
- else
- Result = Tmp1; // 0 -> Lo
+ case ISD::EXTRACT_ELEMENT: {
+ MVT::ValueType OpTy = Node->getOperand(0).getValueType();
+ switch (getTypeAction(OpTy)) {
+ default:
+ assert(0 && "EXTRACT_ELEMENT action for type unimplemented!");
+ break;
+ case Legal:
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) {
+ // 1 -> Hi
+ Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0),
+ DAG.getConstant(MVT::getSizeInBits(OpTy)/2,
+ TLI.getShiftAmountTy()));
+ Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result);
+ } else {
+ // 0 -> Lo
+ Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Result = LegalizeOp(Result);
+ break;
+ case Expand:
+ // Get both the low and high parts.
+ ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
+ Result = Tmp2; // 1 -> Hi
+ else
+ Result = Tmp1; // 0 -> Lo
+ break;
+ }
break;
+ }
case ISD::CopyToReg:
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
@@ -1091,6 +1122,11 @@
if (Tmp1 != Node->getOperand(0))
Result = DAG.getNode(ISD::PCMARKER, MVT::Other, Tmp1,Node->getOperand(1));
break;
+ case ISD::READCYCLECOUNTER:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain
+ if (Tmp1 != Node->getOperand(0))
+ Result = DAG.getNode(ISD::READCYCLECOUNTER, MVT::i64, Tmp1);
+ break;
case ISD::TRUNCSTORE:
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the pointer.
@@ -2243,7 +2279,8 @@
// legal, such as PowerPC.
if (Node->getOpcode() == ISD::FP_TO_UINT &&
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
- TLI.isOperationLegal(ISD::FP_TO_SINT, NVT)) {
+ (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+ TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){
Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1);
} else {
Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37 llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37.2.1
--- llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.37 Thu Oct 13 11:44:00 2005
+++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp Wed Nov 16 12:32:15 2005
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under the
+// This file was developed by James M. Laskey and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -32,6 +33,7 @@
enum ScheduleChoices {
noScheduling,
simpleScheduling,
+ simpleNoItinScheduling
};
} // namespace
@@ -43,6 +45,8 @@
"Trivial emission with no analysis"),
clEnumValN(simpleScheduling, "simple",
"Minimize critical path and maximize processor utilization"),
+ clEnumValN(simpleNoItinScheduling, "simple-noitin",
+ "Same as simple except using generic latency"),
clEnumValEnd));
@@ -97,65 +101,74 @@
typedef typename std::vector<T>::iterator Iter;
// Tally iterator
- /// AllInUse - Test to see if all of the resources in the slot are busy (set.)
- inline bool AllInUse(Iter Cursor, unsigned ResourceSet) {
- return (*Cursor & ResourceSet) == ResourceSet;
- }
-
- /// Skip - Skip over slots that use all of the specified resource (all are
- /// set.)
- Iter Skip(Iter Cursor, unsigned ResourceSet) {
- assert(ResourceSet && "At least one resource bit needs to bet set");
-
- // Continue to the end
- while (true) {
- // Break out if one of the resource bits is not set
- if (!AllInUse(Cursor, ResourceSet)) return Cursor;
- // Try next slot
- Cursor++;
- assert(Cursor < Tally.end() && "Tally is not large enough for schedule");
- }
- }
-
- /// FindSlots - Starting from Begin, locate N consecutive slots where at least
- /// one of the resource bits is available. Returns the address of first slot.
- Iter FindSlots(Iter Begin, unsigned N, unsigned ResourceSet,
- unsigned &Resource) {
- // Track position
- Iter Cursor = Begin;
+ /// SlotsAvailable - Returns true if all units are available.
+ ///
+ bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet,
+ unsigned &Resource) {
+ assert(N && "Must check availability with N != 0");
+ // Determine end of interval
+ Iter End = Begin + N;
+ assert(End <= Tally.end() && "Tally is not large enough for schedule");
- // Try all possible slots forward
- while (true) {
- // Skip full slots
- Cursor = Skip(Cursor, ResourceSet);
- // Determine end of interval
- Iter End = Cursor + N;
- assert(End <= Tally.end() && "Tally is not large enough for schedule");
+ // Iterate thru each resource
+ BitsIterator<T> Resources(ResourceSet & ~*Begin);
+ while (unsigned Res = Resources.Next()) {
+ // Check if resource is available for next N slots
+ Iter Interval = End;
+ do {
+ Interval--;
+ if (*Interval & Res) break;
+ } while (Interval != Begin);
- // Iterate thru each resource
- BitsIterator<T> Resources(ResourceSet & ~*Cursor);
- while (unsigned Res = Resources.Next()) {
- // Check if resource is available for next N slots
- // Break out if resource is busy
- Iter Interval = Cursor;
- for (; Interval < End && !(*Interval & Res); Interval++) {}
-
- // If available for interval, return where and which resource
- if (Interval == End) {
- Resource = Res;
- return Cursor;
- }
- // Otherwise, check if worth checking other resources
- if (AllInUse(Interval, ResourceSet)) {
- // Start looking beyond interval
- Cursor = Interval;
- break;
- }
+ // If available for N
+ if (Interval == Begin) {
+ // Success
+ Resource = Res;
+ return true;
}
- Cursor++;
}
+
+ // No luck
+ Resource = 0;
+ return false;
+ }
+
+ /// RetrySlot - Finds a good candidate slot to retry search.
+ Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) {
+ assert(N && "Must check availability with N != 0");
+ // Determine end of interval
+ Iter End = Begin + N;
+ assert(End <= Tally.end() && "Tally is not large enough for schedule");
+
+ while (Begin != End--) {
+ // Clear units in use
+ ResourceSet &= ~*End;
+ // If no units left then we should go no further
+ if (!ResourceSet) return End + 1;
+ }
+ // Made it all the way through
+ return Begin;
+ }
+
+ /// FindAndReserveStages - Return true if the stages can be completed. If
+ /// so mark as busy.
+ bool FindAndReserveStages(Iter Begin,
+ InstrStage *Stage, InstrStage *StageEnd) {
+ // If at last stage then we're done
+ if (Stage == StageEnd) return true;
+ // Get number of cycles for current stage
+ unsigned N = Stage->Cycles;
+ // Check to see if N slots are available, if not fail
+ unsigned Resource;
+ if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false;
+ // Check to see if remaining stages are available, if not fail
+ if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false;
+ // Reserve resource
+ Reserve(Begin, N, Resource);
+ // Success
+ return true;
}
-
+
/// Reserve - Mark busy (set) the specified N slots.
void Reserve(Iter Begin, unsigned N, unsigned Resource) {
// Determine end of interval
@@ -167,24 +180,39 @@
*Begin |= Resource;
}
+ /// FindSlots - Starting from Begin, locate consecutive slots where all stages
+ /// can be completed. Returns the address of first slot.
+ Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) {
+ // Track position
+ Iter Cursor = Begin;
+
+ // Try all possible slots forward
+ while (true) {
+ // Try at cursor, if successful return position.
+ if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor;
+ // Locate a better position
+ Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units);
+ }
+ }
+
public:
/// Initialize - Resize and zero the tally to the specified number of time
/// slots.
inline void Initialize(unsigned N) {
Tally.assign(N, 0); // Initialize tally to all zeros.
}
-
- // FindAndReserve - Locate and mark busy (set) N bits started at slot I, using
- // ResourceSet for choices.
- unsigned FindAndReserve(unsigned I, unsigned N, unsigned ResourceSet) {
- // Which resource used
- unsigned Resource;
- // Find slots for instruction.
- Iter Where = FindSlots(Tally.begin() + I, N, ResourceSet, Resource);
- // Reserve the slots
- Reserve(Where, N, Resource);
- // Return time slot (index)
- return Where - Tally.begin();
+
+ // FindAndReserve - Locate an ideal slot for the specified stages and mark
+ // as busy.
+ unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin,
+ InstrStage *StageEnd) {
+ // Where to begin
+ Iter Begin = Tally.begin() + Slot;
+ // Find a free slot
+ Iter Where = FindSlots(Begin, StageBegin, StageEnd);
+ // Distance is slot number
+ unsigned Final = Where - Tally.begin();
+ return Final;
}
};
@@ -192,27 +220,46 @@
// Forward
class NodeInfo;
-typedef std::vector<NodeInfo *> NIVector;
-typedef std::vector<NodeInfo *>::iterator NIIterator;
+typedef NodeInfo *NodeInfoPtr;
+typedef std::vector<NodeInfoPtr> NIVector;
+typedef std::vector<NodeInfoPtr>::iterator NIIterator;
//===----------------------------------------------------------------------===//
///
/// Node group - This struct is used to manage flagged node groups.
///
-class NodeGroup : public NIVector {
+class NodeGroup {
private:
+ NIVector Members; // Group member nodes
+ NodeInfo *Dominator; // Node with highest latency
+ unsigned Latency; // Total latency of the group
int Pending; // Number of visits pending before
// adding to order
public:
// Ctor.
- NodeGroup() : Pending(0) {}
+ NodeGroup() : Dominator(NULL), Pending(0) {}
// Accessors
- inline NodeInfo *getLeader() { return empty() ? NULL : front(); }
+ inline void setDominator(NodeInfo *D) { Dominator = D; }
+ inline NodeInfo *getDominator() { return Dominator; }
+ inline void setLatency(unsigned L) { Latency = L; }
+ inline unsigned getLatency() { return Latency; }
inline int getPending() const { return Pending; }
inline void setPending(int P) { Pending = P; }
inline int addPending(int I) { return Pending += I; }
+
+ // Pass thru
+ inline bool group_empty() { return Members.empty(); }
+ inline NIIterator group_begin() { return Members.begin(); }
+ inline NIIterator group_end() { return Members.end(); }
+ inline void group_push_back(const NodeInfoPtr &NI) { Members.push_back(NI); }
+ inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) {
+ return Members.insert(Pos, NI);
+ }
+ inline void group_insert(NIIterator Pos, NIIterator First, NIIterator Last) {
+ Members.insert(Pos, First, Last);
+ }
static void Add(NodeInfo *D, NodeInfo *U);
static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U);
@@ -230,8 +277,9 @@
// adding to order
public:
SDNode *Node; // DAG node
- unsigned Latency; // Cycles to complete instruction
- unsigned ResourceSet; // Bit vector of usable resources
+ InstrStage *StageBegin; // First stage in itinerary
+ InstrStage *StageEnd; // Last+1 stage in itinerary
+ unsigned Latency; // Total cycles to complete instruction
bool IsCall; // Is function call
unsigned Slot; // Node's time slot
NodeGroup *Group; // Grouping information
@@ -244,8 +292,9 @@
NodeInfo(SDNode *N = NULL)
: Pending(0)
, Node(N)
+ , StageBegin(NULL)
+ , StageEnd(NULL)
, Latency(0)
- , ResourceSet(0)
, IsCall(false)
, Slot(0)
, Group(NULL)
@@ -257,11 +306,11 @@
// Accessors
inline bool isInGroup() const {
- assert(!Group || !Group->empty() && "Group with no members");
+ assert(!Group || !Group->group_empty() && "Group with no members");
return Group != NULL;
}
- inline bool isGroupLeader() const {
- return isInGroup() && Group->getLeader() == this;
+ inline bool isGroupDominator() const {
+ return isInGroup() && Group->getDominator() == this;
}
inline int getPending() const {
return Group ? Group->getPending() : Pending;
@@ -298,8 +347,8 @@
if (N->isInGroup()) {
// get Group
NodeGroup *Group = NI->Group;
- NGI = Group->begin();
- NGE = Group->end();
+ NGI = Group->group_begin();
+ NGE = Group->group_end();
// Prevent this node from being used (will be in members list
NI = NULL;
}
@@ -353,7 +402,8 @@
public:
// Ctor.
- NodeGroupOpIterator(NodeInfo *N) : NI(N), GI(N) {}
+ NodeGroupOpIterator(NodeInfo *N)
+ : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {}
/// isEnd - Returns true when not more operands are available.
///
@@ -375,15 +425,6 @@
///
class SimpleSched {
private:
- // TODO - get ResourceSet from TII
- enum {
- RSInteger = 0x3, // Two integer units
- RSFloat = 0xC, // Two float units
- RSLoadStore = 0x30, // Two load store units
- RSBranch = 0x400, // One branch unit
- RSOther = 0 // Processing unit independent
- };
-
MachineBasicBlock *BB; // Current basic block
SelectionDAG &DAG; // DAG of the current basic block
const TargetMachine &TM; // Target processor
@@ -392,6 +433,7 @@
SSARegMap *RegMap; // Virtual/real register map
MachineConstantPool *ConstPool; // Target constant pool
unsigned NodeCount; // Number of nodes in DAG
+ bool HasGroups; // True if there are any groups
NodeInfo *Info; // Info for nodes being scheduled
std::map<SDNode *, NodeInfo *> Map; // Map nodes to info
NIVector Ordering; // Emit ordering of nodes
@@ -406,7 +448,7 @@
: BB(bb), DAG(D), TM(D.getTarget()), TII(*TM.getInstrInfo()),
MRI(*TM.getRegisterInfo()), RegMap(BB->getParent()->getSSARegMap()),
ConstPool(BB->getParent()->getConstantPool()),
- NodeCount(0), Info(NULL), Map(), Tally(), NSlots(0) {
+ NodeCount(0), HasGroups(false), Info(NULL), Map(), Tally(), NSlots(0) {
assert(&TII && "Target doesn't provide instr info?");
assert(&MRI && "Target doesn't provide register info?");
}
@@ -439,6 +481,7 @@
void Schedule();
void IdentifyGroups();
void GatherSchedulingInfo();
+ void FakeGroupDominators();
void PrepareNodeInfo();
bool isStrongDependency(NodeInfo *A, NodeInfo *B);
bool isWeakDependency(NodeInfo *A, NodeInfo *B);
@@ -458,6 +501,27 @@
inline void dump(const char *tag) const { std::cerr << tag; dump(); }
void dump() const;
};
+
+
+//===----------------------------------------------------------------------===//
+/// Special case itineraries.
+///
+enum {
+ CallLatency = 40, // To push calls back in time
+
+ RSInteger = 0xC0000000, // Two integer units
+ RSFloat = 0x30000000, // Two float units
+ RSLoadStore = 0x0C000000, // Two load store units
+ RSBranch = 0x02000000 // One branch unit
+};
+static InstrStage CallStage = { CallLatency, RSBranch };
+static InstrStage LoadStage = { 5, RSLoadStore };
+static InstrStage StoreStage = { 2, RSLoadStore };
+static InstrStage IntStage = { 2, RSInteger };
+static InstrStage FloatStage = { 3, RSFloat };
+//===----------------------------------------------------------------------===//
+
+
//===----------------------------------------------------------------------===//
} // namespace
@@ -491,7 +555,8 @@
}
}
// Merge the two lists
- DGroup->insert(DGroup->end(), UGroup->begin(), UGroup->end());
+ DGroup->group_insert(DGroup->group_end(),
+ UGroup->group_begin(), UGroup->group_end());
} else if (DGroup) {
// Make user member of definers group
U->Group = DGroup;
@@ -503,7 +568,7 @@
// Remove internal edges
DGroup->addPending(-CountInternalUses(DNI, U));
}
- DGroup->push_back(U);
+ DGroup->group_push_back(U);
} else if (UGroup) {
// Make definer member of users group
D->Group = UGroup;
@@ -515,13 +580,13 @@
// Remove internal edges
UGroup->addPending(-CountInternalUses(D, UNI));
}
- UGroup->insert(UGroup->begin(), D);
+ UGroup->group_insert(UGroup->group_begin(), D);
} else {
D->Group = U->Group = DGroup = new NodeGroup();
DGroup->addPending(D->Node->use_size() + U->Node->use_size() -
CountInternalUses(D, U));
- DGroup->push_back(D);
- DGroup->push_back(U);
+ DGroup->group_push_back(D);
+ DGroup->group_push_back(U);
}
}
@@ -529,10 +594,11 @@
///
unsigned NodeGroup::CountInternalUses(NodeInfo *D, NodeInfo *U) {
unsigned N = 0;
- for (SDNode:: use_iterator UI = D->Node->use_begin(),
- E = D->Node->use_end(); UI != E; UI++) {
- if (*UI == U->Node) N++;
+ for (unsigned M = U->Node->getNumOperands(); 0 < M--;) {
+ SDOperand Op = U->Node->getOperand(M);
+ if (Op.Val == D->Node) N++;
}
+
return N;
}
//===----------------------------------------------------------------------===//
@@ -587,9 +653,9 @@
/// IncludeNode - Add node to NodeInfo vector.
///
void SimpleSched::IncludeNode(NodeInfo *NI) {
- // Get node
- SDNode *Node = NI->Node;
- // Ignore entry node
+// Get node
+SDNode *Node = NI->Node;
+// Ignore entry node
if (Node->getOpcode() == ISD::EntryToken) return;
// Check current count for node
int Count = NI->getPending();
@@ -601,7 +667,7 @@
if (!Count) {
// Add node
if (NI->isInGroup()) {
- Ordering.push_back(NI->Group->getLeader());
+ Ordering.push_back(NI->Group->getDominator());
} else {
Ordering.push_back(NI);
}
@@ -662,6 +728,8 @@
if (Op.getValueType() != MVT::Flag) break;
// Add to node group
NodeGroup::Add(getNI(Op.Val), NI);
+ // Let evryone else know
+ HasGroups = true;
}
}
}
@@ -669,8 +737,8 @@
/// GatherSchedulingInfo - Get latency and resource information about each node.
///
void SimpleSched::GatherSchedulingInfo() {
- // Track if groups are present
- bool AreGroups = false;
+ // Get instruction itineraries for the target
+ const InstrItineraryData InstrItins = TM.getInstrItineraryData();
// For each node
for (unsigned i = 0, N = NodeCount; i < N; i++) {
@@ -678,90 +746,87 @@
NodeInfo* NI = &Info[i];
SDNode *Node = NI->Node;
- // Test for groups
- if (NI->isInGroup()) AreGroups = true;
-
- // FIXME: Pretend by using value type to choose metrics
- MVT::ValueType VT = Node->getValueType(0);
-
- // If machine opcode
- if (Node->isTargetOpcode()) {
- MachineOpCode TOpc = Node->getTargetOpcode();
- // FIXME: This is an ugly (but temporary!) hack to test the scheduler
- // before we have real target info.
- // FIXME NI->Latency = std::max(1, TII.maxLatency(TOpc));
- // FIXME NI->ResourceSet = TII.resources(TOpc);
- if (TII.isCall(TOpc)) {
- NI->ResourceSet = RSBranch;
- NI->Latency = 40;
- NI->IsCall = true;
- } else if (TII.isLoad(TOpc)) {
- NI->ResourceSet = RSLoadStore;
- NI->Latency = 5;
- } else if (TII.isStore(TOpc)) {
- NI->ResourceSet = RSLoadStore;
- NI->Latency = 2;
- } else if (MVT::isInteger(VT)) {
- NI->ResourceSet = RSInteger;
- NI->Latency = 2;
- } else if (MVT::isFloatingPoint(VT)) {
- NI->ResourceSet = RSFloat;
- NI->Latency = 3;
- } else {
- NI->ResourceSet = RSOther;
- NI->Latency = 0;
- }
- } else {
- if (MVT::isInteger(VT)) {
- NI->ResourceSet = RSInteger;
- NI->Latency = 2;
- } else if (MVT::isFloatingPoint(VT)) {
- NI->ResourceSet = RSFloat;
- NI->Latency = 3;
- } else {
- NI->ResourceSet = RSOther;
- NI->Latency = 0;
+ // If there are itineraries and it is a machine instruction
+ if (InstrItins.isEmpty() || ScheduleStyle == simpleNoItinScheduling) {
+ // If machine opcode
+ if (Node->isTargetOpcode()) {
+ // Get return type to guess which processing unit
+ MVT::ValueType VT = Node->getValueType(0);
+ // Get machine opcode
+ MachineOpCode TOpc = Node->getTargetOpcode();
+ NI->IsCall = TII.isCall(TOpc);
+
+ if (TII.isLoad(TOpc)) NI->StageBegin = &LoadStage;
+ else if (TII.isStore(TOpc)) NI->StageBegin = &StoreStage;
+ else if (MVT::isInteger(VT)) NI->StageBegin = &IntStage;
+ else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage;
+ if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1;
}
+ } else if (Node->isTargetOpcode()) {
+ // get machine opcode
+ MachineOpCode TOpc = Node->getTargetOpcode();
+ // Check to see if it is a call
+ NI->IsCall = TII.isCall(TOpc);
+ // Get itinerary stages for instruction
+ unsigned II = TII.getSchedClass(TOpc);
+ NI->StageBegin = InstrItins.begin(II);
+ NI->StageEnd = InstrItins.end(II);
+ }
+
+ // One slot for the instruction itself
+ NI->Latency = 1;
+
+ // Add long latency for a call to push it back in time
+ if (NI->IsCall) NI->Latency += CallLatency;
+
+ // Sum up all the latencies
+ for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd;
+ Stage != E; Stage++) {
+ NI->Latency += Stage->Cycles;
}
- // Add one slot for the instruction itself
- NI->Latency++;
-
// Sum up all the latencies for max tally size
NSlots += NI->Latency;
}
// Unify metrics if in a group
- if (AreGroups) {
+ if (HasGroups) {
for (unsigned i = 0, N = NodeCount; i < N; i++) {
NodeInfo* NI = &Info[i];
- if (NI->isGroupLeader()) {
+ if (NI->isInGroup()) {
NodeGroup *Group = NI->Group;
- unsigned Latency = 0;
- unsigned MaxLat = 0;
- unsigned ResourceSet = 0;
- bool IsCall = false;
- for (NIIterator NGI = Group->begin(), NGE = Group->end();
- NGI != NGE; NGI++) {
- NodeInfo* NGNI = *NGI;
- Latency += NGNI->Latency;
- IsCall = IsCall || NGNI->IsCall;
+ if (!Group->getDominator()) {
+ NIIterator NGI = Group->group_begin(), NGE = Group->group_end();
+ NodeInfo *Dominator = *NGI;
+ unsigned Latency = 0;
- if (MaxLat < NGNI->Latency) {
- MaxLat = NGNI->Latency;
- ResourceSet = NGNI->ResourceSet;
+ for (NGI++; NGI != NGE; NGI++) {
+ NodeInfo* NGNI = *NGI;
+ Latency += NGNI->Latency;
+ if (Dominator->Latency < NGNI->Latency) Dominator = NGNI;
}
- NGNI->Latency = 0;
- NGNI->ResourceSet = 0;
- NGNI->IsCall = false;
+ Dominator->Latency = Latency;
+ Group->setDominator(Dominator);
}
-
- NI->Latency = Latency;
- NI->ResourceSet = ResourceSet;
- NI->IsCall = IsCall;
+ }
+ }
+ }
+}
+
+/// FakeGroupDominators - Set dominators for non-scheduling.
+///
+void SimpleSched::FakeGroupDominators() {
+ for (unsigned i = 0, N = NodeCount; i < N; i++) {
+ NodeInfo* NI = &Info[i];
+
+ if (NI->isInGroup()) {
+ NodeGroup *Group = NI->Group;
+
+ if (!Group->getDominator()) {
+ Group->setDominator(NI);
}
}
}
@@ -772,21 +837,18 @@
void SimpleSched::PrepareNodeInfo() {
// Allocate node information
Info = new NodeInfo[NodeCount];
- // Get base of all nodes table
- SelectionDAG::allnodes_iterator AllNodes = DAG.allnodes_begin();
-
- // For each node being scheduled
- for (unsigned i = 0, N = NodeCount; i < N; i++) {
- // Get next node from DAG all nodes table
- SDNode *Node = AllNodes[i];
+
+ unsigned i = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I, ++i) {
// Fast reference to node schedule info
NodeInfo* NI = &Info[i];
// Set up map
- Map[Node] = NI;
+ Map[I] = NI;
// Set node
- NI->Node = Node;
+ NI->Node = I;
// Set pending visit count
- NI->setPending(Node->use_size());
+ NI->setPending(I->use_size());
}
}
@@ -798,7 +860,8 @@
}
/// isWeakDependency Return true if node A produces a result that will
-/// conflict with operands of B.
+/// conflict with operands of B. It is assumed that we have called
+/// isStrongDependency prior.
bool SimpleSched::isWeakDependency(NodeInfo *A, NodeInfo *B) {
// TODO check for conflicting real registers and aliases
#if 0 // FIXME - Since we are in SSA form and not checking register aliasing
@@ -843,9 +906,11 @@
// If independent of others (or first entry)
if (Slot == NotFound) Slot = 0;
+#if 0 // FIXME - measure later
// Find a slot where the needed resources are available
- if (NI->ResourceSet)
- Slot = Tally.FindAndReserve(Slot, NI->Latency, NI->ResourceSet);
+ if (NI->StageBegin != NI->StageEnd)
+ Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+#endif
// Set node slot
NI->Slot = Slot;
@@ -899,8 +964,8 @@
if (Slot == NotFound) Slot = 0;
// Find a slot where the needed resources are available
- if (NI->ResourceSet)
- Slot = Tally.FindAndReserve(Slot, NI->Latency, NI->ResourceSet);
+ if (NI->StageBegin != NI->StageEnd)
+ Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
// Set node slot
NI->Slot = Slot;
@@ -930,7 +995,7 @@
// Iterate through nodes
NodeGroupIterator NGI(Ordering[i]);
if (NI->isInGroup()) {
- if (NI->isGroupLeader()) {
+ if (NI->isGroupDominator()) {
NodeGroupIterator NGI(Ordering[i]);
while (NodeInfo *NI = NGI.next()) EmitNode(NI);
}
@@ -1006,7 +1071,28 @@
// Add result register values for things that are defined by this
// instruction.
- if (NumResults) VRBase = CreateVirtualRegisters(MI, NumResults, II);
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ if (NumResults == 1) {
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *Use = *UI;
+ if (Use->getOpcode() == ISD::CopyToReg &&
+ Use->getOperand(2).Val == Node) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (MRegisterInfo::isVirtualRegister(Reg)) {
+ VRBase = Reg;
+ MI->addRegOperand(Reg, MachineOperand::Def);
+ break;
+ }
+ }
+ }
+ }
+
+ // Otherwise, create new virtual registers.
+ if (NumResults && VRBase == 0)
+ VRBase = CreateVirtualRegisters(MI, NumResults, II);
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
@@ -1084,10 +1170,11 @@
case ISD::TokenFactor:
break;
case ISD::CopyToReg: {
- unsigned Val = getVR(Node->getOperand(2));
- MRI.copyRegToReg(*BB, BB->end(),
- cast<RegisterSDNode>(Node->getOperand(1))->getReg(), Val,
- RegMap->getRegClass(Val));
+ unsigned InReg = getVR(Node->getOperand(2));
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (InReg != DestReg) // Coallesced away the copy?
+ MRI.copyRegToReg(*BB, BB->end(), DestReg, InReg,
+ RegMap->getRegClass(InReg));
break;
}
case ISD::CopyFromReg: {
@@ -1097,21 +1184,40 @@
break;
}
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *Use = *UI;
+ if (Use->getOpcode() == ISD::CopyToReg &&
+ Use->getOperand(2).Val == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (MRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
// Figure out the register class to create for the destreg.
const TargetRegisterClass *TRC = 0;
+ if (VRBase) {
+ TRC = RegMap->getRegClass(VRBase);
+ } else {
- // Pick the register class of the right type that contains this physreg.
- for (MRegisterInfo::regclass_iterator I = MRI.regclass_begin(),
- E = MRI.regclass_end(); I != E; ++I)
- if ((*I)->getType() == Node->getValueType(0) &&
- (*I)->contains(SrcReg)) {
- TRC = *I;
- break;
- }
- assert(TRC && "Couldn't find register class for reg copy!");
+ // Pick the register class of the right type that contains this physreg.
+ for (MRegisterInfo::regclass_iterator I = MRI.regclass_begin(),
+ E = MRI.regclass_end(); I != E; ++I)
+ if ((*I)->getType() == Node->getValueType(0) &&
+ (*I)->contains(SrcReg)) {
+ TRC = *I;
+ break;
+ }
+ assert(TRC && "Couldn't find register class for reg copy!");
- // Create the reg, emit the copy.
- VRBase = RegMap->createVirtualRegister(TRC);
+ // Create the reg, emit the copy.
+ VRBase = RegMap->createVirtualRegister(TRC);
+ }
MRI.copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC);
break;
}
@@ -1126,11 +1232,23 @@
///
void SimpleSched::Schedule() {
// Number the nodes
- NodeCount = DAG.allnodes_size();
- // Set up minimum info for scheduling.
+ NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end());
+ // Test to see if scheduling should occur
+ bool ShouldSchedule = NodeCount > 3 && ScheduleStyle != noScheduling;
+ // Set up minimum info for scheduling
PrepareNodeInfo();
// Construct node groups for flagged nodes
IdentifyGroups();
+
+ // Don't waste time if is only entry and return
+ if (ShouldSchedule) {
+ // Get latency and resource requirements
+ GatherSchedulingInfo();
+ } else if (HasGroups) {
+ // Make sure all the groups have dominators
+ FakeGroupDominators();
+ }
+
// Breadth first walk of DAG
VisitAll();
@@ -1144,10 +1262,7 @@
#endif
// Don't waste time if is only entry and return
- if (NodeCount > 3 && ScheduleStyle != noScheduling) {
- // Get latency and resource requirements
- GatherSchedulingInfo();
-
+ if (ShouldSchedule) {
// Push back long instructions and critical path
ScheduleBackward();
@@ -1182,9 +1297,9 @@
std::cerr << " " << NI->Preorder << ". ";
printSI(std::cerr, NI);
std::cerr << "\n";
- if (NI->isGroupLeader()) {
+ if (NI->isGroupDominator()) {
NodeGroup *Group = NI->Group;
- for (NIIterator NII = Group->begin(), E = Group->end();
+ for (NIIterator NII = Group->group_begin(), E = Group->group_end();
NII != E; NII++) {
std::cerr << " ";
printSI(std::cerr, *NII);
@@ -1205,7 +1320,6 @@
SDNode *Node = NI->Node;
O << " "
<< std::hex << Node << std::dec
- << ", RS=" << NI->ResourceSet
<< ", Lat=" << NI->Latency
<< ", Slot=" << NI->Slot
<< ", ARITY=(" << Node->getNumOperands() << ","
@@ -1226,9 +1340,9 @@
NodeInfo *NI = Ordering[i];
printSI(O, NI);
O << "\n";
- if (NI->isGroupLeader()) {
+ if (NI->isGroupDominator()) {
NodeGroup *Group = NI->Group;
- for (NIIterator NII = Group->begin(), E = Group->end();
+ for (NIIterator NII = Group->group_begin(), E = Group->group_end();
NII != E; NII++) {
O << " ";
printSI(O, *NII);
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206.2.1
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.206 Wed Oct 12 22:11:28 2005
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Nov 16 12:32:15 2005
@@ -167,63 +167,63 @@
/// chain but no other uses and no side effect. If a node is passed in as an
/// argument, it is used as the seed for node deletion.
void SelectionDAG::RemoveDeadNodes(SDNode *N) {
- std::set<SDNode*> AllNodeSet(AllNodes.begin(), AllNodes.end());
-
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted.
HandleSDNode Dummy(getRoot());
+ bool MadeChange = false;
+
// If we have a hint to start from, use it.
- if (N) DeleteNodeIfDead(N, &AllNodeSet);
-
- Restart:
- unsigned NumNodes = AllNodeSet.size();
- for (std::set<SDNode*>::iterator I = AllNodeSet.begin(), E = AllNodeSet.end();
- I != E; ++I) {
- // Try to delete this node.
- DeleteNodeIfDead(*I, &AllNodeSet);
-
- // If we actually deleted any nodes, do not use invalid iterators in
- // AllNodeSet.
- if (AllNodeSet.size() != NumNodes)
- goto Restart;
- }
-
- // Restore AllNodes.
- if (AllNodes.size() != NumNodes)
- AllNodes.assign(AllNodeSet.begin(), AllNodeSet.end());
+ if (N && N->use_empty()) {
+ DestroyDeadNode(N);
+ MadeChange = true;
+ }
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty() && I->getOpcode() != 65535) {
+ // Node is dead, recursively delete newly dead uses.
+ DestroyDeadNode(I);
+ MadeChange = true;
+ }
+
+ // Walk the nodes list, removing the nodes we've marked as dead.
+ if (MadeChange) {
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ if (N->use_empty())
+ AllNodes.erase(N);
+ }
+ }
+
// If the root changed (e.g. it was a dead load, update the root).
setRoot(Dummy.getValue());
}
-
-void SelectionDAG::DeleteNodeIfDead(SDNode *N, void *NodeSet) {
- if (!N->use_empty())
- return;
-
+/// DestroyDeadNode - We know that N is dead. Nuke it from the CSE maps for the
+/// graph. If it is the last user of any of its operands, recursively process
+/// them the same way.
+///
+void SelectionDAG::DestroyDeadNode(SDNode *N) {
// Okay, we really are going to delete this node. First take this out of the
// appropriate CSE map.
RemoveNodeFromCSEMaps(N);
// Next, brutally remove the operand list. This is safe to do, as there are
// no cycles in the graph.
- while (!N->Operands.empty()) {
- SDNode *O = N->Operands.back().Val;
- N->Operands.pop_back();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ SDNode *O = I->Val;
O->removeUser(N);
// Now that we removed this operand, see if there are no uses of it left.
- DeleteNodeIfDead(O, NodeSet);
+ if (O->use_empty())
+ DestroyDeadNode(O);
}
-
- // Remove the node from the nodes set and delete it.
- std::set<SDNode*> &AllNodeSet = *(std::set<SDNode*>*)NodeSet;
- AllNodeSet.erase(N);
-
- // Now that the node is gone, check to see if any of the operands of this node
- // are dead now.
- delete N;
+ delete[] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+
+ // Mark the node as dead.
+ N->MorphNodeTo(65535);
}
void SelectionDAG::DeleteNode(SDNode *N) {
@@ -240,22 +240,14 @@
void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
// Remove it from the AllNodes list.
- for (std::vector<SDNode*>::iterator I = AllNodes.begin(); ; ++I) {
- assert(I != AllNodes.end() && "Node not in AllNodes list??");
- if (*I == N) {
- // Erase from the vector, which is not ordered.
- std::swap(*I, AllNodes.back());
- AllNodes.pop_back();
- break;
- }
- }
+ AllNodes.remove(N);
// Drop all of the operands and decrement used nodes use counts.
- while (!N->Operands.empty()) {
- SDNode *O = N->Operands.back().Val;
- N->Operands.pop_back();
- O->removeUser(N);
- }
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ I->Val->removeUser(N);
+ delete[] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
delete N;
}
@@ -312,6 +304,9 @@
case ISD::ExternalSymbol:
Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
break;
+ case ISD::TargetExternalSymbol:
+ Erased = TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
case ISD::VALUETYPE:
Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0;
ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0;
@@ -419,14 +414,18 @@
AN = N;
}
return 0;
-
}
SelectionDAG::~SelectionDAG() {
- for (unsigned i = 0, e = AllNodes.size(); i != e; ++i)
- delete AllNodes[i];
+ while (!AllNodes.empty()) {
+ SDNode *N = AllNodes.begin();
+ delete [] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+ AllNodes.pop_front();
+ }
}
SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) {
@@ -551,7 +550,15 @@
SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDOperand(N, 0);
- N = new ExternalSymbolSDNode(Sym, VT);
+ N = new ExternalSymbolSDNode(false, Sym, VT);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT::ValueType VT) {
+ SDNode *&N = TargetExternalSymbols[Sym];
+ if (N) return SDOperand(N, 0);
+ N = new ExternalSymbolSDNode(true, Sym, VT);
AllNodes.push_back(N);
return SDOperand(N, 0);
}
@@ -1065,9 +1072,9 @@
assert((getOpcode() == ISD::CALLSEQ_START ||
getOpcode() == ISD::CALLSEQ_END) && "Cannot adjust this node!");
- Operands[0].Val->removeUser(this);
- Operands[0] = N;
- N.Val->Uses.push_back(this);
+ OperandList[0].Val->removeUser(this);
+ OperandList[0] = N;
+ OperandList[0].Val->Uses.push_back(this);
}
@@ -1080,7 +1087,7 @@
N = new SDNode(ISD::LOAD, Chain, Ptr, SV);
// Loads have a token chain.
- N->setValueTypes(VT, MVT::Other);
+ setNodeValueTypes(N, VT, MVT::Other);
AllNodes.push_back(N);
return SDOperand(N, 0);
}
@@ -1198,7 +1205,7 @@
case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
-
+
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Ops[1].Val);
switch (Opcode) {
default: break;
@@ -1324,11 +1331,46 @@
} else {
N = new SDNode(Opcode, Ops);
}
- N->setValueTypes(ResultTys);
+ setNodeValueTypes(N, ResultTys);
AllNodes.push_back(N);
return SDOperand(N, 0);
}
+void SelectionDAG::setNodeValueTypes(SDNode *N,
+ std::vector<MVT::ValueType> &RetVals) {
+ switch (RetVals.size()) {
+ case 0: return;
+ case 1: N->setValueTypes(RetVals[0]); return;
+ case 2: setNodeValueTypes(N, RetVals[0], RetVals[1]); return;
+ default: break;
+ }
+
+ std::list<std::vector<MVT::ValueType> >::iterator I =
+ std::find(VTList.begin(), VTList.end(), RetVals);
+ if (I == VTList.end()) {
+ VTList.push_front(RetVals);
+ I = VTList.begin();
+ }
+
+ N->setValueTypes(&(*I)[0], I->size());
+}
+
+void SelectionDAG::setNodeValueTypes(SDNode *N, MVT::ValueType VT1,
+ MVT::ValueType VT2) {
+ for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+ E = VTList.end(); I != E; ++I) {
+ if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2) {
+ N->setValueTypes(&(*I)[0], 2);
+ return;
+ }
+ }
+ std::vector<MVT::ValueType> V;
+ V.push_back(VT1);
+ V.push_back(VT2);
+ VTList.push_front(V);
+ N->setValueTypes(&(*VTList.begin())[0], 2);
+}
+
/// SelectNodeTo - These are used for target selectors to *mutate* the
/// specified node to have the specified return type, Target opcode, and
@@ -1360,7 +1402,7 @@
SDOperand Op1, SDOperand Op2) {
RemoveNodeFromCSEMaps(N);
N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc);
- N->setValueTypes(VT1, VT2);
+ setNodeValueTypes(N, VT1, VT2);
N->setOperands(Op1, Op2);
}
void SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
@@ -1376,7 +1418,7 @@
SDOperand Op1, SDOperand Op2, SDOperand Op3) {
RemoveNodeFromCSEMaps(N);
N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc);
- N->setValueTypes(VT1, VT2);
+ setNodeValueTypes(N, VT1, VT2);
N->setOperands(Op1, Op2, Op3);
}
@@ -1417,10 +1459,11 @@
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(U);
- for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
- if (U->getOperand(i).Val == From) {
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
From->removeUser(U);
- U->Operands[i].Val = To;
+ I->Val = To;
To->addUser(U);
}
@@ -1458,10 +1501,11 @@
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(U);
- for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
- if (U->getOperand(i).Val == From) {
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
From->removeUser(U);
- U->Operands[i].Val = To;
+ I->Val = To;
To->addUser(U);
}
@@ -1499,11 +1543,12 @@
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(U);
- for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i)
- if (U->getOperand(i).Val == From) {
- const SDOperand &ToOp = To[U->getOperand(i).ResNo];
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
+ const SDOperand &ToOp = To[I->ResNo];
From->removeUser(U);
- U->Operands[i] = ToOp;
+ *I = ToOp;
ToOp.Val->addUser(U);
}
@@ -1523,6 +1568,15 @@
// SDNode Class
//===----------------------------------------------------------------------===//
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) {
+ static MVT::ValueType VTs[MVT::LAST_VALUETYPE];
+ VTs[VT] = VT;
+ return &VTs[VT];
+}
+
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
/// indicated value. This method ignores uses of other values defined by this
/// operation.
@@ -1570,6 +1624,7 @@
}
case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
case ISD::VALUETYPE: return "ValueType";
case ISD::EntryToken: return "EntryToken";
@@ -1586,6 +1641,7 @@
case ISD::BasicBlock: return "BasicBlock";
case ISD::Register: return "Register";
case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
case ISD::ConstantPool: return "ConstantPool";
case ISD::TargetConstantPool: return "TargetConstantPool";
case ISD::CopyToReg: return "CopyToReg";
@@ -1771,7 +1827,7 @@
}
}
-static void DumpNodes(SDNode *N, unsigned indent, const SelectionDAG *G) {
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
if (N->getOperand(i).Val->hasOneUse())
DumpNodes(N->getOperand(i).Val, indent+2, G);
@@ -1786,7 +1842,11 @@
void SelectionDAG::dump() const {
std::cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
- std::vector<SDNode*> Nodes(AllNodes);
+ std::vector<const SDNode*> Nodes;
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I)
+ Nodes.push_back(I);
+
std::sort(Nodes.begin(), Nodes.end());
for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.1 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.2
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.88.2.1 Tue Oct 18 14:21:57 2005
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Nov 16 12:32:15 2005
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Debug.h"
#include <map>
#include <iostream>
@@ -45,7 +46,6 @@
static const bool ViewDAGs = 0;
#endif
-
namespace llvm {
//===--------------------------------------------------------------------===//
/// FunctionLoweringInfo - This contains information that is global to a
@@ -72,14 +72,6 @@
/// anywhere in the function.
std::map<const AllocaInst*, int> StaticAllocaMap;
- /// BlockLocalArguments - If any arguments are only used in a single basic
- /// block, and if the target can access the arguments without side-effects,
- /// avoid emitting CopyToReg nodes for those arguments. This map keeps
- /// track of which arguments are local to each BB.
- std::multimap<BasicBlock*, std::pair<Argument*,
- unsigned> > BlockLocalArguments;
-
-
unsigned MakeReg(MVT::ValueType VT) {
return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
}
@@ -125,24 +117,39 @@
return false;
}
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.
+static bool isOnlyUsedInEntryBlock(Argument *A) {
+ BasicBlock *Entry = A->getParent()->begin();
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != Entry)
+ return false; // Use not in entry block.
+ return true;
+}
+
FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
Function &fn, MachineFunction &mf)
: TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
- // Initialize the mapping of values to registers. This is only set up for
- // instruction values that are used outside of the block that defines
- // them.
+ // Create a vreg for each argument register that is not dead and is used
+ // outside of the entry block for the function.
for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
AI != E; ++AI)
- InitializeRegForValue(AI);
+ if (!isOnlyUsedInEntryBlock(AI))
+ InitializeRegForValue(AI);
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
Function::iterator BB = Fn.begin(), EB = Fn.end();
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(AI->getArraySize())) {
const Type *Ty = AI->getAllocatedType();
uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
- unsigned Align = TLI.getTargetData().getTypeAlignment(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
+ AI->getAlignment());
// If the alignment of the value is smaller than the size of the value,
// and if the size of the value is particularly small (<= 8 bytes),
@@ -151,9 +158,8 @@
// FIXME: This could be made better with a preferred alignment hook in
// TargetData. It serves primarily to 8-byte align doubles for X86.
if (Align < TySize && TySize <= 8) Align = TySize;
-
- if (CUI->getValue()) // Don't produce zero sized stack objects
- TySize *= CUI->getValue(); // Get total allocated size.
+ TySize *= CUI->getValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align);
}
@@ -399,6 +405,7 @@
void visitStore(StoreInst &I);
void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
void visitCall(CallInst &I);
+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
void visitVAStart(CallInst &I);
void visitVAArg(VAArgInst &I);
@@ -464,8 +471,8 @@
case MVT::f64:
break; // No extension needed!
}
-
- DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot(), Op1));
+ // Allow targets to lower this further to meet ABI requirements
+ DAG.setRoot(TLI.LowerReturnTo(getRoot(), Op1, DAG));
}
void SelectionDAGLowering::visitBr(BranchInst &I) {
@@ -614,24 +621,47 @@
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
- if (!isa<Constant>(Idx) || !cast<Constant>(Idx)->isNullValue()) {
- // N = N + Idx * ElementSize;
- uint64_t ElementSize = TD.getTypeSize(Ty);
- SDOperand IdxN = getValue(Idx), Scale = getIntPtrConstant(ElementSize);
-
- // If the index is smaller or larger than intptr_t, truncate or extend
- // it.
- if (IdxN.getValueType() < Scale.getValueType()) {
- if (Idx->getType()->isSigned())
- IdxN = DAG.getNode(ISD::SIGN_EXTEND, Scale.getValueType(), IdxN);
- else
- IdxN = DAG.getNode(ISD::ZERO_EXTEND, Scale.getValueType(), IdxN);
- } else if (IdxN.getValueType() > Scale.getValueType())
- IdxN = DAG.getNode(ISD::TRUNCATE, Scale.getValueType(), IdxN);
- IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getRawValue() == 0) continue;
+
+ uint64_t Offs;
+ if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI))
+ Offs = (int64_t)TD.getTypeSize(Ty)*CSI->getValue();
+ else
+ Offs = TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue();
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeSize(Ty);
+ SDOperand IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ if (IdxN.getValueType() < N.getValueType()) {
+ if (Idx->getType()->isSigned())
+ IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
+ else
+ IdxN = DAG.getNode(ISD::ZERO_EXTEND, N.getValueType(), IdxN);
+ } else if (IdxN.getValueType() > N.getValueType())
+ IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (isPowerOf2_64(ElementSize)) {
+ unsigned Amt = Log2_64(ElementSize);
+ IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getShiftAmountTy()));
N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+ continue;
}
+
+ SDOperand Scale = getIntPtrConstant(ElementSize);
+ IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
}
}
setValue(&I, N);
@@ -645,7 +675,8 @@
const Type *Ty = I.getAllocatedType();
uint64_t TySize = TLI.getTargetData().getTypeSize(Ty);
- unsigned Align = TLI.getTargetData().getTypeAlignment(Ty);
+ unsigned Align = std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty),
+ I.getAlignment());
SDOperand AllocSize = getValue(I.getArraySize());
MVT::ValueType IntPtr = TLI.getPointerTy();
@@ -719,123 +750,144 @@
DAG.getSrcValue(I.getOperand(1))));
}
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0;
+ case Intrinsic::frameaddress: visitFrameReturnAddress(I, true); return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
+ break;
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
+ break;
+ case Intrinsic::memcpy: visitMemIntrinsic(I, ISD::MEMCPY); return 0;
+ case Intrinsic::memset: visitMemIntrinsic(I, ISD::MEMSET); return 0;
+ case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return 0;
+
+ case Intrinsic::readport:
+ case Intrinsic::readio: {
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(TLI.getValueType(I.getType()));
+ VTs.push_back(MVT::Other);
+ std::vector<SDOperand> Ops;
+ Ops.push_back(getRoot());
+ Ops.push_back(getValue(I.getOperand(1)));
+ SDOperand Tmp = DAG.getNode(Intrinsic == Intrinsic::readport ?
+ ISD::READPORT : ISD::READIO, VTs, Ops);
+
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::writeport:
+ case Intrinsic::writeio:
+ DAG.setRoot(DAG.getNode(Intrinsic == Intrinsic::writeport ?
+ ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
+ getRoot(), getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ return 0;
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ case Intrinsic::dbg_declare:
+ if (I.getType() != Type::VoidTy)
+ setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
+ return 0;
+
+ case Intrinsic::isunordered:
+ setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)), ISD::SETUO));
+ return 0;
+
+ case Intrinsic::sqrt:
+ setValue(&I, DAG.getNode(ISD::FSQRT,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(MVT::i64);
+ VTs.push_back(MVT::Other);
+ std::vector<SDOperand> Ops;
+ Ops.push_back(getRoot());
+ SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, VTs, Ops);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::cttz:
+ setValue(&I, DAG.getNode(ISD::CTTZ,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::ctlz:
+ setValue(&I, DAG.getNode(ISD::CTLZ,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::ctpop:
+ setValue(&I, DAG.getNode(ISD::CTPOP,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ default:
+ std::cerr << I;
+ assert(0 && "This intrinsic is not implemented yet!");
+ return 0;
+ }
+}
+
+
void SelectionDAGLowering::visitCall(CallInst &I) {
const char *RenameFn = 0;
- SDOperand Tmp;
- if (Function *F = I.getCalledFunction())
+ if (Function *F = I.getCalledFunction()) {
if (F->isExternal())
- switch (F->getIntrinsicID()) {
- case 0: // Not an LLVM intrinsic.
- if (F->getName() == "fabs" || F->getName() == "fabsf") {
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ } else { // Not an LLVM intrinsic.
+ const std::string &Name = F->getName();
+ if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
if (I.getNumOperands() == 2 && // Basic sanity checks.
I.getOperand(1)->getType()->isFloatingPoint() &&
I.getType() == I.getOperand(1)->getType()) {
- Tmp = getValue(I.getOperand(1));
+ SDOperand Tmp = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
return;
}
- }
- else if (F->getName() == "sin" || F->getName() == "sinf") {
+ } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
if (I.getNumOperands() == 2 && // Basic sanity checks.
I.getOperand(1)->getType()->isFloatingPoint() &&
I.getType() == I.getOperand(1)->getType()) {
- Tmp = getValue(I.getOperand(1));
+ SDOperand Tmp = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
return;
}
- }
- else if (F->getName() == "cos" || F->getName() == "cosf") {
+ } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
if (I.getNumOperands() == 2 && // Basic sanity checks.
I.getOperand(1)->getType()->isFloatingPoint() &&
I.getType() == I.getOperand(1)->getType()) {
- Tmp = getValue(I.getOperand(1));
+ SDOperand Tmp = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
return;
}
}
- break;
- case Intrinsic::vastart: visitVAStart(I); return;
- case Intrinsic::vaend: visitVAEnd(I); return;
- case Intrinsic::vacopy: visitVACopy(I); return;
- case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return;
- case Intrinsic::frameaddress: visitFrameReturnAddress(I, true); return;
-
- case Intrinsic::setjmp:
- RenameFn = "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
- break;
- case Intrinsic::longjmp:
- RenameFn = "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp();
- break;
- case Intrinsic::memcpy: visitMemIntrinsic(I, ISD::MEMCPY); return;
- case Intrinsic::memset: visitMemIntrinsic(I, ISD::MEMSET); return;
- case Intrinsic::memmove: visitMemIntrinsic(I, ISD::MEMMOVE); return;
-
- case Intrinsic::readport:
- case Intrinsic::readio: {
- std::vector<MVT::ValueType> VTs;
- VTs.push_back(TLI.getValueType(I.getType()));
- VTs.push_back(MVT::Other);
- std::vector<SDOperand> Ops;
- Ops.push_back(getRoot());
- Ops.push_back(getValue(I.getOperand(1)));
- Tmp = DAG.getNode(F->getIntrinsicID() == Intrinsic::readport ?
- ISD::READPORT : ISD::READIO, VTs, Ops);
-
- setValue(&I, Tmp);
- DAG.setRoot(Tmp.getValue(1));
- return;
- }
- case Intrinsic::writeport:
- case Intrinsic::writeio:
- DAG.setRoot(DAG.getNode(F->getIntrinsicID() == Intrinsic::writeport ?
- ISD::WRITEPORT : ISD::WRITEIO, MVT::Other,
- getRoot(), getValue(I.getOperand(1)),
- getValue(I.getOperand(2))));
- return;
- case Intrinsic::dbg_stoppoint:
- case Intrinsic::dbg_region_start:
- case Intrinsic::dbg_region_end:
- case Intrinsic::dbg_func_start:
- case Intrinsic::dbg_declare:
- if (I.getType() != Type::VoidTy)
- setValue(&I, DAG.getNode(ISD::UNDEF, TLI.getValueType(I.getType())));
- return;
-
- case Intrinsic::isunordered:
- setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)),
- getValue(I.getOperand(2)), ISD::SETUO));
- return;
-
- case Intrinsic::sqrt:
- setValue(&I, DAG.getNode(ISD::FSQRT,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
- return;
-
- case Intrinsic::pcmarker:
- Tmp = getValue(I.getOperand(1));
- DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
- return;
- case Intrinsic::cttz:
- setValue(&I, DAG.getNode(ISD::CTTZ,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
- return;
- case Intrinsic::ctlz:
- setValue(&I, DAG.getNode(ISD::CTLZ,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
- return;
- case Intrinsic::ctpop:
- setValue(&I, DAG.getNode(ISD::CTPOP,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
- return;
- default:
- std::cerr << I;
- assert(0 && "This intrinsic is not implemented yet!");
- return;
}
+ }
SDOperand Callee;
if (!RenameFn)
@@ -843,7 +895,7 @@
else
Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
std::vector<std::pair<SDOperand, const Type*> > Args;
-
+ Args.reserve(I.getNumOperands());
for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
Value *Arg = I.getOperand(i);
SDOperand ArgNode = getValue(Arg);
@@ -912,6 +964,11 @@
return 0;
}
+SDOperand TargetLowering::LowerReturnTo(SDOperand Chain, SDOperand Op,
+ SelectionDAG &DAG) {
+ return DAG.getNode(ISD::RET, MVT::Other, Chain, Op);
+}
+
SDOperand TargetLowering::LowerVAStart(SDOperand Chain,
SDOperand VAListP, Value *VAListV,
SelectionDAG &DAG) {
@@ -1022,7 +1079,6 @@
// updates dom and loop info.
}
-
bool SelectionDAGISel::runOnFunction(Function &Fn) {
MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
RegMap = MF.getSSARegMap();
@@ -1039,7 +1095,7 @@
if (isa<Constant>(PN->getIncomingValue(i)))
SplitCriticalEdge(PN->getIncomingBlock(i), BB);
}
-
+
FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
@@ -1081,104 +1137,45 @@
}
}
-/// IsOnlyUsedInOneBasicBlock - If the specified argument is only used in a
-/// single basic block, return that block. Otherwise, return a null pointer.
-static BasicBlock *IsOnlyUsedInOneBasicBlock(Argument *A) {
- if (A->use_empty()) return 0;
- BasicBlock *BB = cast<Instruction>(A->use_back())->getParent();
- for (Argument::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E;
- ++UI)
- if (isa<PHINode>(*UI) || cast<Instruction>(*UI)->getParent() != BB)
- return 0; // Disagreement among the users?
-
- // Okay, there is a single BB user. Only permit this optimization if this is
- // the entry block, otherwise, we might sink argument loads into loops and
- // stuff. Later, when we have global instruction selection, this won't be an
- // issue clearly.
- if (BB == BB->getParent()->begin())
- return BB;
- return 0;
-}
-
void SelectionDAGISel::
LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL,
std::vector<SDOperand> &UnorderedChains) {
// If this is the entry block, emit arguments.
Function &F = *BB->getParent();
FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+ SDOperand OldRoot = SDL.DAG.getRoot();
+ std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
- if (BB == &F.front()) {
- SDOperand OldRoot = SDL.DAG.getRoot();
-
- std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
-
- // If there were side effects accessing the argument list, do not do
- // anything special.
- if (OldRoot != SDL.DAG.getRoot()) {
- unsigned a = 0;
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
- AI != E; ++AI,++a)
- if (!AI->use_empty()) {
- SDL.setValue(AI, Args[a]);
-
- SDOperand Copy =
- CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
- UnorderedChains.push_back(Copy);
- }
- } else {
- // Otherwise, if any argument is only accessed in a single basic block,
- // emit that argument only to that basic block.
- unsigned a = 0;
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
- AI != E; ++AI,++a)
- if (!AI->use_empty()) {
- if (BasicBlock *BBU = IsOnlyUsedInOneBasicBlock(AI)) {
- FuncInfo.BlockLocalArguments.insert(std::make_pair(BBU,
- std::make_pair(AI, a)));
- } else {
- SDL.setValue(AI, Args[a]);
- SDOperand Copy =
- CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
- UnorderedChains.push_back(Copy);
- }
- }
- }
-
- // Next, if the function has live ins that need to be copied into vregs,
- // emit the copies now, into the top of the block.
- MachineFunction &MF = SDL.DAG.getMachineFunction();
- if (MF.livein_begin() != MF.livein_end()) {
- SSARegMap *RegMap = MF.getSSARegMap();
- const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
- for (MachineFunction::livein_iterator LI = MF.livein_begin(),
- E = MF.livein_end(); LI != E; ++LI)
- if (LI->second)
- MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
- LI->first, RegMap->getRegClass(LI->second));
- }
+ unsigned a = 0;
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI, ++a)
+ if (!AI->use_empty()) {
+ SDL.setValue(AI, Args[a]);
- // Finally, if the target has anything special to do, allow it to do so.
- EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
- }
-
- // See if there are any block-local arguments that need to be emitted in this
- // block.
-
- if (!FuncInfo.BlockLocalArguments.empty()) {
- std::multimap<BasicBlock*, std::pair<Argument*, unsigned> >::iterator BLAI =
- FuncInfo.BlockLocalArguments.lower_bound(BB);
- if (BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB) {
- // Lower the arguments into this block.
- std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
-
- // Set up the value mapping for the local arguments.
- for (; BLAI != FuncInfo.BlockLocalArguments.end() && BLAI->first == BB;
- ++BLAI)
- SDL.setValue(BLAI->second.first, Args[BLAI->second.second]);
-
- // Any dead arguments will just be ignored here.
+ // If this argument is live outside of the entry block, insert a copy from
+ // whereever we got it to the vreg that other BB's will reference it as.
+ if (FuncInfo.ValueMap.count(AI)) {
+ SDOperand Copy =
+ CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]);
+ UnorderedChains.push_back(Copy);
+ }
}
+
+ // Next, if the function has live ins that need to be copied into vregs,
+ // emit the copies now, into the top of the block.
+ MachineFunction &MF = SDL.DAG.getMachineFunction();
+ if (MF.livein_begin() != MF.livein_end()) {
+ SSARegMap *RegMap = MF.getSSARegMap();
+ const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo();
+ for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+ E = MF.livein_end(); LI != E; ++LI)
+ if (LI->second)
+ MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+ LI->first, RegMap->getRegClass(LI->second));
}
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
}
@@ -1189,8 +1186,9 @@
std::vector<SDOperand> UnorderedChains;
- // Lower any arguments needed in this block.
- LowerArguments(LLVMBB, SDL, UnorderedChains);
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &LLVMBB->getParent()->front())
+ LowerArguments(LLVMBB, SDL, UnorderedChains);
BB = FuncInfo.MBBMap[LLVMBB];
SDL.setCurrentBasicBlock(BB);
@@ -1269,7 +1267,18 @@
// Turn all of the unordered chains into one factored node.
if (!UnorderedChains.empty()) {
- UnorderedChains.push_back(SDL.getRoot());
+ SDOperand Root = SDL.getRoot();
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = UnorderedChains.size();
+ for (; i != e; ++i) {
+ assert(UnorderedChains[i].Val->getNumOperands() > 1);
+ if (UnorderedChains[i].Val->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ UnorderedChains.push_back(Root);
+ }
DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, UnorderedChains));
}
More information about the llvm-commits
mailing list