[llvm-branch-commits] [llvm-branch] r92854 - in /llvm/branches/Apple/Zoidberg: include/llvm/CodeGen/SelectionDAGISel.h include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td test/CodeGen/Blackfin/promote-logic.ll test/CodeGen/CellSPU/mul_ops.ll test/CodeGen/SystemZ/2009-06-02-Rotate.ll

Evan Cheng evan.cheng at apple.com
Wed Jan 6 12:14:11 PST 2010


Author: evancheng
Date: Wed Jan  6 14:14:11 2010
New Revision: 92854

URL: http://llvm.org/viewvc/llvm-project?rev=92854&view=rev
Log:
Merge 91717, 92513, 92694, 92849, 92850.

Modified:
    llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h
    llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
    llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
    llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td
    llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll
    llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll
    llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll

Modified: llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/CodeGen/SelectionDAGISel.h Wed Jan  6 14:14:11 2010
@@ -131,6 +131,7 @@
   void CodeGenAndEmitDAG();
   void LowerArguments(BasicBlock *BB);
   
+  void ShrinkDemandedOps();
   void ComputeLiveOutVRegInfo();
 
   void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB);

Modified: llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h (original)
+++ llvm/branches/Apple/Zoidberg/include/llvm/Target/TargetLowering.h Wed Jan  6 14:14:11 2010
@@ -768,10 +768,12 @@
   /// that want to combine 
   struct TargetLoweringOpt {
     SelectionDAG &DAG;
+    bool ShrinkOps;
     SDValue Old;
     SDValue New;
 
-    explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {}
+    explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
+      DAG(InDAG), ShrinkOps(Shrink) {}
     
     bool CombineTo(SDValue O, SDValue N) { 
       Old = O; 
@@ -1486,7 +1488,7 @@
   }
 
   /// isZExtFree - Return true if any actual instruction that defines a
-  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+  /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result
   /// register. This does not necessarily include registers defined in
   /// unknown ways, such as incoming arguments, or copies from unknown
   /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jan  6 14:14:11 2010
@@ -1688,18 +1688,18 @@
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
   //
   // do not sink logical op inside of a vector extend, since it may combine
   // into a vsetcc.
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
-      (!LegalOperations ||
-       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
                                  N0.getOperand(0), N1.getOperand(0));
@@ -1839,6 +1839,7 @@
   if (!VT.isVector() &&
       SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
+
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -1885,48 +1886,89 @@
 
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
-  if (N1C && N0.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() &&
-        // Do not change the width of a volatile load.
-        !LN0->isVolatile()) {
-      EVT ExtVT = MVT::Other;
+        LN0->isUnindexed() && N0.hasOneUse()) {
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-
-      EVT LoadedVT = LN0->getMemoryVT();
-
-      // Do not generate loads of non-round integer types since these can
-      // be expensive (and would be wrong if the type is not byte sized).
-      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
-        EVT PtrType = N0.getOperand(1).getValueType();
-
-        // For big endian targets, we need to add an offset to the pointer to
-        // load the correct bytes.  For little endian systems, we merely need to
-        // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
-        unsigned EVTStoreBytes = ExtVT.getStoreSize();
-        unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-        unsigned Alignment = LN0->getAlignment();
-        SDValue NewPtr = LN0->getBasePtr();
-
-        if (TLI.isBigEndian()) {
-          NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
-                               NewPtr, DAG.getConstant(PtrOff, PtrType));
-          Alignment = MinAlign(Alignment, PtrOff);
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          if (HasAnyExt) {
+            SDValue Load = 
+              DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+                             LN0->getValueType(0),
+                             LN0->getChain(), LN0->getBasePtr(),
+                             LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                             ExtVT, LN0->isVolatile(), LN0->getAlignment());
+            AddToWorkList(N);
+            CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          } else {
+            SDValue Load =
+              DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                             LN0->getChain(), LN0->getBasePtr(),
+                             LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                             ExtVT, LN0->isVolatile(), LN0->getAlignment());
+            AddToWorkList(N);
+            CombineTo(N0.getNode(), Load, Load.getValue(1));
+            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          }
+        } else if (!LN0->isVolatile()) {
+          // Do not change the width of a volatile load.
+          // Do not generate loads of non-round integer types since these can
+          // be expensive (and would be wrong if the type is not byte sized).
+          if (LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+              (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+            EVT PtrType = LN0->getOperand(1).getValueType();
+
+            // For big endian targets, we need to add an offset to the pointer
+            // to load the correct bytes.  For little endian systems, we merely
+            // need to read fewer bytes from the same pointer.
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            unsigned Alignment = LN0->getAlignment();
+            SDValue NewPtr = LN0->getBasePtr();
+
+            if (TLI.isBigEndian()) {
+              NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                   NewPtr, DAG.getConstant(PtrOff, PtrType));
+              Alignment = MinAlign(Alignment, PtrOff);
+            }
+
+            AddToWorkList(NewPtr.getNode());
+            if (HasAnyExt) {
+              SDValue Load =
+                DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(),
+                               LN0->getValueType(0),
+                               LN0->getChain(), NewPtr,
+                               LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                               ExtVT, LN0->isVolatile(), Alignment);
+              AddToWorkList(N);
+              CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1));
+              return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+            } else {
+              SDValue Load =
+                DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                               LN0->getChain(), NewPtr,
+                               LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                               ExtVT, LN0->isVolatile(), Alignment);
+              AddToWorkList(N);
+              CombineTo(N0.getNode(), Load, Load.getValue(1));
+              return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+            }
+          }
         }
-
-        AddToWorkList(NewPtr.getNode());
-        SDValue Load =
-          DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
-                         NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         ExtVT, LN0->isVolatile(), Alignment);
-        AddToWorkList(N);
-        CombineTo(N0.getNode(), Load, Load.getValue(1));
-        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
     }
   }
@@ -2755,7 +2797,42 @@
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRL.getNode())
+      return NewSRL;
+  }
+
+  // Here is a common situation. We want to optimize:
+  //
+  //   %a = ...
+  //   %b = and i32 %a, 2
+  //   %c = srl i32 %b, 1
+  //   brcond i32 %c ...
+  //
+  // into
+  // 
+  //   %a = ...
+  //   %b = and %a, 2
+  //   %c = setcc eq %b, 0
+  //   brcond %c ...
+  //
+  // However when after the source operand of SRL is optimized into AND, the SRL
+  // itself may not be optimized further. Look for it and add the BRCOND into
+  // the worklist.
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
@@ -3171,7 +3248,10 @@
 
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
     SDValue Op = N0.getOperand(0);
     if (Op.getValueType().bitsLT(VT)) {
       Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3757,7 @@
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
     else
       // if the source and dest are the same type, we can drop both the extend
-      // and the truncate
+      // and the truncate.
       return N0.getOperand(0);
   }
 
@@ -4488,6 +4568,13 @@
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
   if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
     // Match this pattern so that we can generate simpler code:
     //
@@ -4499,7 +4586,7 @@
     // into
     // 
     //   %a = ...
-    //   %b = and %a, 2
+    //   %b = and i32 %a, 2
     //   %c = setcc eq %b, 0
     //   brcond %c ...
     //
@@ -4510,7 +4597,6 @@
     SDValue Op1 = N1.getOperand(1);
 
     if (Op0.getOpcode() == ISD::AND &&
-        Op0.hasOneUse() &&
         Op1.getOpcode() == ISD::Constant) {
       SDValue AndOp1 = Op0.getOperand(1);
 
@@ -4525,12 +4611,21 @@
                          Op0, DAG.getConstant(0, Op0.getValueType()),
                          ISD::SETNE);
 
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
           // Replace the uses of SRL with SETCC
           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
           removeFromWorkList(N1.getNode());
           DAG.DeleteNode(N1.getNode());
-          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                             MVT::Other, Chain, SetCC, N2);
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
       }
     }

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Jan  6 14:14:11 2010
@@ -2626,6 +2626,8 @@
     // size of the value, the shift/rotate count is guaranteed to be zero.
     if (VT == MVT::i1)
       return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
     break;
   case ISD::FP_ROUND_INREG: {
     EVT EVT = cast<VTSDNode>(N2)->getVT();

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Jan  6 14:14:11 2010
@@ -424,6 +424,78 @@
   SDL->clear();
 }
 
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+  SmallVector<SDNode*, 128> Worklist;
+
+  // Add all the dag nodes to the worklist.
+  Worklist.reserve(CurDAG->allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I)
+    Worklist.push_back(I);
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+
+    if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+
+    // Run ShrinkDemandedOp on scalar binary operations.
+    if (N->getNumValues() == 1 &&
+        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+      DebugLoc dl = N->getDebugLoc();
+      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero, KnownOne;
+      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        // Revisit the node.
+        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                       Worklist.end());
+        Worklist.push_back(N);
+
+        // Replace the old value with the new one.
+        DEBUG(errs() << "\nReplacing "; 
+              TLO.Old.getNode()->dump(CurDAG);
+              errs() << "\nWith: ";
+              TLO.New.getNode()->dump(CurDAG);
+              errs() << '\n');
+
+        Worklist.push_back(TLO.New.getNode());
+        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+        if (TLO.Old.getNode()->use_empty()) {
+          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+               i != e; ++i) {
+            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+            if (OpNode->hasOneUse()) {
+              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                         OpNode),
+                             Worklist.end());
+              Worklist.push_back(TLO.Old.getNode()->getOperand(i).getNode());
+            }
+          }
+
+          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                     TLO.Old.getNode()),
+                         Worklist.end());
+          CurDAG->DeleteNode(TLO.Old.getNode());
+        }
+      }
+    }
+  }
+}
+
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   SmallPtrSet<SDNode*, 128> VisitedNodes;
   SmallVector<SDNode*, 128> Worklist;
@@ -597,8 +669,10 @@
 
   if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
 
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    ShrinkDemandedOps();
     ComputeLiveOutVRegInfo();
+  }
 
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Jan  6 14:14:11 2010
@@ -986,7 +986,7 @@
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1020,7 +1020,7 @@
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1045,7 +1045,7 @@
     if ((KnownZero2 & NewMask) == NewMask)
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // If all of the unknown bits are known to be zero on one side or the other
@@ -1476,7 +1476,7 @@
                              KnownOne2, TLO, Depth+1))
       return true;
     // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
   }
   // FALL THROUGH
@@ -1872,7 +1872,9 @@
 
     // Fold bit comparisons when we can.
     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
         EVT ShiftTy = DCI.isBeforeLegalize() ?
@@ -1880,16 +1882,18 @@
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
           if (isPowerOf2_64(AndRHS->getZExtValue())) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
                                 DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                ShiftTy));
+                                                ShiftTy)));
           }
         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
           // (X & 8) == 8  -->  (X & 8) >> 3
           // Perform the xform if C1 is a single bit.
           if (C1.isPowerOf2()) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(C1.logBase2(), ShiftTy));
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
           }
         }
       }

Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86ISelLowering.cpp Wed Jan  6 14:14:11 2010
@@ -978,6 +978,7 @@
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::MEMBARRIER);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -5681,6 +5682,56 @@
   return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
 }
 
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+                         DebugLoc dl, SelectionDAG &DAG) {
+  SDValue LHS, RHS;
+  if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op010C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+      if (Op010C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(0);
+        RHS = Op0.getOperand(1).getOperand(1);
+      }
+  } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op000C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+      if (Op000C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(1);
+        RHS = Op0.getOperand(0).getOperand(1);
+      }
+  } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+    SDValue AndLHS = Op0.getOperand(0);
+    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+      LHS = AndLHS.getOperand(0);
+      RHS = AndLHS.getOperand(1);
+    }
+  }
+
+  if (LHS.getNode()) {
+    // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
+    // instruction.  Since the shift amount is in-range-or-undefined, we know
+    // that doing a bittest on the i16 value is ok.  We extend to i32 because
+    // the encoding for the i16 version is larger than the i32 version.
+    if (LHS.getValueType() == MVT::i8)
+      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+    // If the operand types disagree, extend the shift amount to match.  Since
+    // BT ignores high bits (like shifts) we can use anyextend.
+    if (LHS.getValueType() != RHS.getValueType())
+      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(Cond, MVT::i8), BT);
+  }
+
+  return SDValue();
+}
+
 SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
   SDValue Op0 = Op.getOperand(0);
@@ -5688,6 +5739,7 @@
   DebugLoc dl = Op.getDebugLoc();
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
+  // Optimize to BT if possible.
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
@@ -5696,48 +5748,9 @@
       Op1.getOpcode() == ISD::Constant &&
       cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    SDValue LHS, RHS;
-    if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op010C =
-            dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
-        if (Op010C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(0);
-          RHS = Op0.getOperand(1).getOperand(1);
-        }
-    } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op000C =
-            dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
-        if (Op000C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(1);
-          RHS = Op0.getOperand(0).getOperand(1);
-        }
-    } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
-      ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
-      SDValue AndLHS = Op0.getOperand(0);
-      if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
-        LHS = AndLHS.getOperand(0);
-        RHS = AndLHS.getOperand(1);
-      }
-    }
-
-    if (LHS.getNode()) {
-      // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
-      // instruction.  Since the shift amount is in-range-or-undefined, we know
-      // that doing a bittest on the i16 value is ok.  We extend to i32 because
-      // the encoding for the i16 version is larger than the i32 version.
-      if (LHS.getValueType() == MVT::i8)
-        LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
-      // If the operand types disagree, extend the shift amount to match.  Since
-      // BT ignores high bits (like shifts) we can use anyextend.
-      if (LHS.getValueType() != RHS.getValueType())
-        RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
-
-      SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-      unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                         DAG.getConstant(Cond, MVT::i8), BT);
-    }
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
   }
 
   bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
@@ -5936,6 +5949,23 @@
   }
 
   if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
@@ -6097,6 +6127,23 @@
   }
 
   if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
@@ -9117,6 +9164,64 @@
   return SDValue();
 }
 
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 || !Subtarget->is64Bit())
+    return SDValue();
+
+  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  SDValue ShAmt0 = N0.getOperand(1);
+  if (ShAmt0.getValueType() != MVT::i8)
+    return SDValue();
+  SDValue ShAmt1 = N1.getOperand(1);
+  if (ShAmt1.getValueType() != MVT::i8)
+    return SDValue();
+  if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+    ShAmt0 = ShAmt0.getOperand(0);
+  if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+    ShAmt1 = ShAmt1.getOperand(0);
+
+  DebugLoc DL = N->getDebugLoc();
+  unsigned Opc = X86ISD::SHLD;
+  SDValue Op0 = N0.getOperand(0);
+  SDValue Op1 = N1.getOperand(0);
+  if (ShAmt0.getOpcode() == ISD::SUB) {
+    Opc = X86ISD::SHRD;
+    std::swap(Op0, Op1);
+    std::swap(ShAmt0, ShAmt1);
+  }
+
+  if (ShAmt1.getOpcode() == ISD::SUB) {
+    SDValue Sum = ShAmt1.getOperand(0);
+    if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+      if (SumC->getSExtValue() == 64 &&
+          ShAmt1.getOperand(1) == ShAmt0)
+        return DAG.getNode(Opc, DL, VT,
+                           Op0, Op1,
+                           DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+    }
+  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+    ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+    if (ShAmt0C &&
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+      return DAG.getNode(Opc, DL, VT,
+                         N0.getOperand(0), N1.getOperand(0),
+                         DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+  }
+
+  return SDValue();
+}
+
 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                    const X86Subtarget *Subtarget) {
@@ -9379,6 +9484,7 @@
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);

Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Instr64bit.td Wed Jan  6 14:14:11 2010
@@ -2098,24 +2098,7 @@
 def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
 
-// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
-def : Pat<(or (srl GR64:$src1, CL:$amt),
-              (shl GR64:$src2, (sub 64, CL:$amt))),
-          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
-                     (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
-          (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
-              (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
-                     (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-                 addr:$dst),
-          (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
+// Double shift patterns
 def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
           (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 
@@ -2123,24 +2106,6 @@
                        GR64:$src2, (i8 imm:$amt2)), addr:$dst),
           (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
 
-// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-def : Pat<(or (shl GR64:$src1, CL:$amt),
-              (srl GR64:$src2, (sub 64, CL:$amt))),
-          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
-                     (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
-          (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
-              (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
-                     (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-                 addr:$dst),
-          (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
 def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
           (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 

Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/Blackfin/promote-logic.ll Wed Jan  6 14:14:11 2010
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=bfin > %t
+; XFAIL: *
 
 ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
 ; operation after LegalizeOps.

Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/CellSPU/mul_ops.ll Wed Jan  6 14:14:11 2010
@@ -11,7 +11,6 @@
 ; RUN: grep shli    %t1.s | count 4
 ; RUN: grep shlhi   %t1.s | count 4
 ; RUN: grep ila     %t1.s | count 2
-; RUN: grep xsbh    %t1.s | count 4
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 

Modified: llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll?rev=92854&r1=92853&r2=92854&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll (original)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/SystemZ/2009-06-02-Rotate.ll Wed Jan  6 14:14:11 2010
@@ -5,8 +5,8 @@
 
 define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-	%shl = shl i32 %x, 0		; <i32> [#uses=1]
-	%sub = sub i32 32, 0		; <i32> [#uses=1]
+	%shl = shl i32 %x, 1		; <i32> [#uses=1]
+	%sub = sub i32 32, 1		; <i32> [#uses=1]
 	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
 	%or = or i32 %shr, %shl		; <i32> [#uses=1]
 	ret i32 %or





More information about the llvm-branch-commits mailing list