[llvm-branch-commits] [llvm-branch] r92854 - in /llvm/branches/Apple/Zoidberg: include/llvm/CodeGen/SelectionDAGISel.h include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td test/CodeGen/Blackfin/promote-logic.ll test/CodeGen/CellSPU/mul_ops.ll test/CodeGen/SystemZ/2009-06-02-Rotate.ll
Evan Cheng
evan.cheng at apple.com
Wed Jan 6 12:14:11 PST 2010
Author: evancheng
Date: Wed Jan 6 14:14:11 2010
New Revision: 92854
URL: http://llvm.org/viewvc/llvm-project?rev=92854&view=rev
Log:
Merge 91717, 92513, 92694, 92849, 92850.
Modified:
llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h
llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td
llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll
llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll
llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
Modified: llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h Wed Jan 6 14:14:11 2010
@@ -131,6 +131,7 @@
void CodeGenAndEmitDAG();
void LowerArguments(BasicBlock *BB);
+ void ShrinkDemandedOps();
void ComputeLiveOutVRegInfo();
void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB);
Modified: llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h Wed Jan 6 14:14:11 2010
@@ -768,10 +768,12 @@
/// that want to combine
struct TargetLoweringOpt {
SelectionDAG &DAG;
+ bool ShrinkOps;
SDValue Old;
SDValue New;
- explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {}
+ explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
+ DAG(InDAG), ShrinkOps(Shrink) {}
bool CombineTo(SDValue O, SDValue N) {
Old = O;
@@ -1486,7 +1488,7 @@
}
/// isZExtFree - Return true if any actual instruction that defines a
- /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result
/// register. This does not necessarily include registers defined in
/// unknown ways, such as incoming arguments, or copies from unknown
/// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jan 6 14:14:11 2010
@@ -1688,18 +1688,18 @@
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
- if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
- (N0.getOpcode() == ISD::TRUNCATE &&
- !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+ (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
- N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
- (!LegalOperations ||
- TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
@@ -1839,6 +1839,7 @@
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+
// fold (zext_inreg (extload x)) -> (zextload x)
if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -1885,48 +1886,89 @@
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
- if (N1C && N0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
- LN0->isUnindexed() && N0.hasOneUse() &&
- // Do not change the width of a volatile load.
- !LN0->isVolatile()) {
- EVT ExtVT = MVT::Other;
+ LN0->isUnindexed() && N0.hasOneUse()) {
uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
- ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-
- EVT LoadedVT = LN0->getMemoryVT();
-
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
- EVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to add an offset to the pointer to
- // load the correct bytes. For little endian systems, we merely need to
- // read fewer bytes from the same pointer.
- unsigned LVTStoreBytes = LoadedVT.getStoreSize();
- unsigned EVTStoreBytes = ExtVT.getStoreSize();
- unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- unsigned Alignment = LN0->getAlignment();
- SDValue NewPtr = LN0->getBasePtr();
-
- if (TLI.isBigEndian()) {
- NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
- NewPtr, DAG.getConstant(PtrOff, PtrType));
- Alignment = MinAlign(Alignment, PtrOff);
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT LoadedVT = LN0->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ if (HasAnyExt) {
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+ LN0->getValueType(0),
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ } else {
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else if (!LN0->isVolatile()) {
+ // Do not change the width of a volatile load.
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ if (TLI.isBigEndian()) {
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+ if (HasAnyExt) {
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+ LN0->getValueType(0),
+ LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), Alignment);
+ AddToWorkList(N);
+ CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ } else {
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ ExtVT, LN0->isVolatile(), Alignment);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
}
-
- AddToWorkList(NewPtr.getNode());
- SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
- NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
- ExtVT, LN0->isVolatile(), Alignment);
- AddToWorkList(N);
- CombineTo(N0.getNode(), Load, Load.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
@@ -2755,7 +2797,42 @@
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+ if (N1C) {
+ SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ if (NewSRL.getNode())
+ return NewSRL;
+ }
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorkList(Use);
+ }
+ }
+
+ return SDValue();
}
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
@@ -3171,7 +3248,10 @@
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3757,7 @@
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
else
// if the source and dest are the same type, we can drop both the extend
- // and the truncate
+ // and the truncate.
return N0.getOperand(0);
}
@@ -4488,6 +4568,13 @@
N1.getOperand(0), N1.getOperand(1), N2);
}
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+ // Look pass truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
@@ -4499,7 +4586,7 @@
// into
//
// %a = ...
- // %b = and %a, 2
+ // %b = and i32 %a, 2
// %c = setcc eq %b, 0
// brcond %c ...
//
@@ -4510,7 +4597,6 @@
SDValue Op1 = N1.getOperand(1);
if (Op0.getOpcode() == ISD::AND &&
- Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
@@ -4525,12 +4611,21 @@
Op0, DAG.getConstant(0, Op0.getValueType()),
ISD::SETNE);
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc) {
+ removeFromWorkList(Trunc);
+ DAG.DeleteNode(Trunc);
+ }
// Replace the uses of SRL with SETCC
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
- return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
- MVT::Other, Chain, SetCC, N2);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Jan 6 14:14:11 2010
@@ -2626,6 +2626,8 @@
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
break;
case ISD::FP_ROUND_INREG: {
EVT EVT = cast<VTSDNode>(N2)->getVT();
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Jan 6 14:14:11 2010
@@ -424,6 +424,78 @@
SDL->clear();
}
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+ SmallVector<SDNode*, 128> Worklist;
+
+ // Add all the dag nodes to the worklist.
+ Worklist.reserve(CurDAG->allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I)
+ Worklist.push_back(I);
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+
+ if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+
+ // Run ShrinkDemandedOp on scalar binary operations.
+ if (N->getNumValues() == 1 &&
+ N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+ KnownZero, KnownOne, TLO)) {
+ // Revisit the node.
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+ Worklist.end());
+ Worklist.push_back(N);
+
+ // Replace the old value with the new one.
+ DEBUG(errs() << "\nReplacing ";
+ TLO.Old.getNode()->dump(CurDAG);
+ errs() << "\nWith: ";
+ TLO.New.getNode()->dump(CurDAG);
+ errs() << '\n');
+
+ Worklist.push_back(TLO.New.getNode());
+ CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ if (TLO.Old.getNode()->use_empty()) {
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+ i != e; ++i) {
+ SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
+ if (OpNode->hasOneUse()) {
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+ OpNode),
+ Worklist.end());
+ Worklist.push_back(TLO.Old.getNode()->getOperand(i).getNode());
+ }
+ }
+
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+ TLO.Old.getNode()),
+ Worklist.end());
+ CurDAG->DeleteNode(TLO.Old.getNode());
+ }
+ }
+ }
+ }
+}
+
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@@ -597,8 +669,10 @@
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None) {
+ ShrinkDemandedOps();
ComputeLiveOutVRegInfo();
+ }
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Jan 6 14:14:11 2010
@@ -986,7 +986,7 @@
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1020,7 +1020,7 @@
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1045,7 +1045,7 @@
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@@ -1476,7 +1476,7 @@
KnownOne2, TLO, Depth+1))
return true;
// See if the operation should be performed at a smaller bit width.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
}
// FALL THROUGH
@@ -1872,7 +1872,9 @@
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy = DCI.isBeforeLegalize() ?
@@ -1880,16 +1882,18 @@
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
if (isPowerOf2_64(AndRHS->getZExtValue())) {
- return DAG.getNode(ISD::SRL, dl, VT, N0,
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
- ShiftTy));
+ ShiftTy)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
// Perform the xform if C1 is a single bit.
if (C1.isPowerOf2()) {
- return DAG.getNode(ISD::SRL, dl, VT, N0,
- DAG.getConstant(C1.logBase2(), ShiftTy));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy)));
}
}
}
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp Wed Jan 6 14:14:11 2010
@@ -978,6 +978,7 @@
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MEMBARRIER);
setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -5681,6 +5682,56 @@
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ SDValue LHS, RHS;
+ if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op010C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+ if (Op010C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1).getOperand(1);
+ }
+ } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op000C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+ if (Op000C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(1);
+ RHS = Op0.getOperand(0).getOperand(1);
+ }
+ } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+ SDValue AndLHS = Op0.getOperand(0);
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i16 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ if (LHS.getValueType() == MVT::i8)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
@@ -5688,6 +5739,7 @@
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
@@ -5696,48 +5748,9 @@
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue LHS, RHS;
- if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op010C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
- if (Op010C->getZExtValue() == 1) {
- LHS = Op0.getOperand(0);
- RHS = Op0.getOperand(1).getOperand(1);
- }
- } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op000C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
- if (Op000C->getZExtValue() == 1) {
- LHS = Op0.getOperand(1);
- RHS = Op0.getOperand(0).getOperand(1);
- }
- } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
- SDValue AndLHS = Op0.getOperand(0);
- if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
- LHS = AndLHS.getOperand(0);
- RHS = AndLHS.getOperand(1);
- }
- }
-
- if (LHS.getNode()) {
- // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i16 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- if (LHS.getValueType() == MVT::i8)
- LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (LHS.getValueType() != RHS.getValueType())
- RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
-
- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
- unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(Cond, MVT::i8), BT);
- }
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
}
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
@@ -5936,6 +5949,23 @@
}
if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
@@ -6097,6 +6127,23 @@
}
if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
@@ -9117,6 +9164,64 @@
return SDValue();
}
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 || !Subtarget->is64Bit())
+ return SDValue();
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ SDValue ShAmt0 = N0.getOperand(1);
+ if (ShAmt0.getValueType() != MVT::i8)
+ return SDValue();
+ SDValue ShAmt1 = N1.getOperand(1);
+ if (ShAmt1.getValueType() != MVT::i8)
+ return SDValue();
+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+ ShAmt0 = ShAmt0.getOperand(0);
+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+ ShAmt1 = ShAmt1.getOperand(0);
+
+ DebugLoc DL = N->getDebugLoc();
+ unsigned Opc = X86ISD::SHLD;
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N1.getOperand(0);
+ if (ShAmt0.getOpcode() == ISD::SUB) {
+ Opc = X86ISD::SHRD;
+ std::swap(Op0, Op1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+
+ if (ShAmt1.getOpcode() == ISD::SUB) {
+ SDValue Sum = ShAmt1.getOperand(0);
+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ if (SumC->getSExtValue() == 64 &&
+ ShAmt1.getOperand(1) == ShAmt0)
+ return DAG.getNode(Opc, DL, VT,
+ Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if (ShAmt0C &&
+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+ return DAG.getNode(Opc, DL, VT,
+ N0.getOperand(0), N1.getOperand(0),
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+
+ return SDValue();
+}
+
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -9379,6 +9484,7 @@
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::OR: return PerformOrCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td Wed Jan 6 14:14:11 2010
@@ -2098,24 +2098,7 @@
def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
-// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
-def : Pat<(or (srl GR64:$src1, CL:$amt),
- (shl GR64:$src2, (sub 64, CL:$amt))),
- (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
- (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
- (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- addr:$dst),
- (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
+// Double shift patterns
def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
(SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
@@ -2123,24 +2106,6 @@
GR64:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-def : Pat<(or (shl GR64:$src1, CL:$amt),
- (srl GR64:$src2, (sub 64, CL:$amt))),
- (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
- (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
- (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- addr:$dst),
- (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
(SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll Wed Jan 6 14:14:11 2010
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=bfin > %t
+; XFAIL: *
; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
; operation after LegalizeOps.
Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll Wed Jan 6 14:14:11 2010
@@ -11,7 +11,6 @@
; RUN: grep shli %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
; RUN: grep ila %t1.s | count 2
-; RUN: grep xsbh %t1.s | count 4
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll?rev=92854&r1=92853&r2=92854&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll Wed Jan 6 14:14:11 2010
@@ -5,8 +5,8 @@
define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
- %shl = shl i32 %x, 0 ; <i32> [#uses=1]
- %sub = sub i32 32, 0 ; <i32> [#uses=1]
+ %shl = shl i32 %x, 1 ; <i32> [#uses=1]
+ %sub = sub i32 32, 1 ; <i32> [#uses=1]
%shr = lshr i32 %x, %sub ; <i32> [#uses=1]
%or = or i32 %shr, %shl ; <i32> [#uses=1]
ret i32 %or
More information about the llvm-branch-commits
mailing list