[llvm-commits] [llvm] r66914 - in /llvm/branches/Apple/Dib: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/X86/README.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/const-select.ll test/CodeGen/X86/mul-legalize.ll test/CodeGen/X86/select-no-cmov.ll

Fri Mar 13 11:08:20 PDT 2009

Author: void
Date: Fri Mar 13 13:08:19 2009
New Revision: 66914

URL: http://llvm.org/viewvc/llvm-project?rev=66914&view=rev
Log:

--- Merging (from foreign repository) r66779 into '.':
U    test/CodeGen/X86/mul-legalize.ll
A    test/CodeGen/X86/const-select.ll
U    lib/CodeGen/SelectionDAG/DAGCombiner.cpp
U    lib/Target/ARM/ARMISelLowering.cpp
U    lib/Target/X86/X86ISelLowering.cpp
U    lib/Target/X86/README.txt

Move 3 "(add (select cc, 0, c), x) -> (select cc, x, (add, x, c))"
related transformations out of target-specific dag combine into the
ARM backend.  These were added by Evan in r37685 with no testcases
and only seems to help ARM (e.g. test/CodeGen/ARM/select_xform.ll).

Add some simple X86-specific (for now) DAG combines that turn things
like cond ? 8 : 0  -> (zext(cond) << 3).  This happens frequently
with the recently added cp constant select optimization, but is a
very general xform.  For example, we now compile the second example
in const-select.ll to:

_test:
        movsd   LCPI2_0, %xmm0
        ucomisd 8(%esp), %xmm0
        seta    %al
        movzbl  %al, %eax
        movl    4(%esp), %ecx
        movsbl  (%ecx,%eax,4), %eax
        ret

instead of:

_test:
        movl    4(%esp), %eax
        leal    4(%eax), %ecx
        movsd   LCPI2_0, %xmm0
        ucomisd 8(%esp), %xmm0
        cmovbe  %eax, %ecx
        movsbl  (%ecx), %eax
        ret

This passes multisource and dejagnu.

--- Merging (from foreign repository) r66868 into '.':
A    test/CodeGen/X86/select-no-cmov.ll
G    lib/Target/X86/X86ISelLowering.cpp

optimize the case of cond ? 42 : 41 and friends.  This compiles the
example to:

_test:
        movl    4(%esp), %eax
        cmpl    $41, (%eax)
        setg    %al
        movzbl  %al, %eax
        orl     $4294967294, %eax
        ret

instead of:

        movl    4(%esp), %eax
        cmpl    $41, (%eax)
        movl    $4294967294, %ecx
        movl    $4294967295, %eax
        cmova   %ecx, %eax
        ret

which is smaller in code size and faster. rdar://6668608

--- Merging (from foreign repository) r66869 into '.':
U    test/CodeGen/X86/select-no-cmov.ll
G    lib/Target/X86/X86ISelLowering.cpp

generalize the previous code to use the full generality of LEA
for i32/i64 expressions (we could also do i16 on cpus where
i16 lea is fast, but I didn't add this).  On the example, we now
generate:

_test:
        movl    4(%esp), %eax
        cmpl    $42, (%eax)
        setl    %al
        movzbl  %al, %eax
        leal    4(%eax,%eax,8), %eax
        ret

instead of:

_test:
        movl    4(%esp), %eax
        cmpl    $41, (%eax)
        movl    $4, %ecx
        movl    $13, %eax
        cmovg   %ecx, %eax
        ret

Added:
    llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll
    llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll
Modified:
    llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp
    llvm/branches/Apple/Dib/lib/Target/X86/README.txt
    llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll

Modified: llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=66914&r1=66913&r2=66914&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Mar 13 13:08:19 2009
@@ -543,8 +543,10 @@
   if (AddTo) {
     // Push the new nodes and any users onto the worklist
     for (unsigned i = 0, e = NumTo; i != e; ++i) {
-      AddToWorkList(To[i].getNode());
-      AddUsersToWorkList(To[i].getNode());
+      if (To[i].getNode()) {
+        AddToWorkList(To[i].getNode());
+        AddUsersToWorkList(To[i].getNode());
+      }
     }
   }
   
@@ -944,65 +946,6 @@
   return SDValue();
 }
 
-static
-SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
-                            SelectionDAG &DAG, const TargetLowering &TLI,
-                            bool LegalOperations) {
-  MVT VT = N->getValueType(0);
-  unsigned Opc = N->getOpcode();
-  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
-  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
-  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
-  ISD::CondCode CC = ISD::SETCC_INVALID;
-
-  if (isSlctCC) {
-    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
-  } else {
-    SDValue CCOp = Slct.getOperand(0);
-    if (CCOp.getOpcode() == ISD::SETCC)
-      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
-  }
-
-  bool DoXform = false;
-  bool InvCC = false;
-  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
-          "Bad input!");
-
-  if (LHS.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(LHS)->isNullValue()) {
-    DoXform = true;
-  } else if (CC != ISD::SETCC_INVALID &&
-             RHS.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHS)->isNullValue()) {
-    std::swap(LHS, RHS);
-    SDValue Op0 = Slct.getOperand(0);
-    MVT OpVT = isSlctCC ? Op0.getValueType() :
-                          Op0.getOperand(0).getValueType();
-    bool isInt = OpVT.isInteger();
-    CC = ISD::getSetCCInverse(CC, isInt);
-
-    if (LegalOperations && !TLI.isCondCodeLegal(CC, OpVT))
-      return SDValue();         // Inverse operator isn't legal.
-
-    DoXform = true;
-    InvCC = true;
-  }
-
-  if (DoXform) {
-    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
-    if (isSlctCC)
-      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
-                             Slct.getOperand(0), Slct.getOperand(1), CC);
-    SDValue CCOp = Slct.getOperand(0);
-    if (InvCC)
-      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
-                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
-    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
-                       CCOp, OtherOp, Result);
-  }
-  return SDValue();
-}
-
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1123,16 +1066,6 @@
     if (Result.getNode()) return Result;
   }
 
-  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
-  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
-    SDValue Result = combineSelectAndUse(N, N0, N1, DAG, TLI, LegalOperations);
-    if (Result.getNode()) return Result;
-  }
-  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
-    SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations);
-    if (Result.getNode()) return Result;
-  }
-
   return SDValue();
 }
 
@@ -1246,11 +1179,6 @@
       N0.getOperand(1).getOperand(1) == N1)
     return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
-  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
-  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
-    SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations);
-    if (Result.getNode()) return Result;
-  }
 
   // If either operand of a sub is undef, the result is undef
   if (N0.getOpcode() == ISD::UNDEF)

Modified: llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp?rev=66914&r1=66913&r2=66914&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp Fri Mar 13 13:08:19 2009
@@ -256,7 +256,10 @@
 
   // We have target-specific dag combine patterns for the following nodes:
   // ARMISD::FMRRD  - No need to call setTargetDAGCombine
-  
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+      
+      
   setStackPointerRegisterToSaveRestore(ARM::SP);
   setSchedulingPreference(SchedulingForRegPressure);
   setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
@@ -1562,6 +1565,102 @@
 //                           ARM Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+                            TargetLowering::DAGCombinerInfo &DCI) {
+  
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+
+  if (isSlctCC) {
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  } else {
+    SDValue CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue()) {
+    DoXform = true;
+  } else if (CC != ISD::SETCC_INVALID &&
+             RHS.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    SDValue Op0 = Slct.getOperand(0);
+    MVT OpVT = isSlctCC ? Op0.getValueType() :
+                          Op0.getOperand(0).getValueType();
+    bool isInt = OpVT.isInteger();
+    CC = ISD::getSetCCInverse(CC, isInt);
+
+    if (!TLI.isCondCodeLegal(CC, OpVT))
+      return SDValue();         // Inverse operator isn't legal.
+
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDValue CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                       CCOp, OtherOp, Result);
+  }
+  return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+static SDValue PerformADDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // added by evan in r37685 with no testcase.
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+    if (Result.getNode()) return Result;
+  }
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+  
+  return SDValue();
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+static SDValue PerformSUBCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // added by evan in r37685 with no testcase.
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+  
+  return SDValue();
+}
+
+
 /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
 static SDValue PerformFMRRDCombine(SDNode *N, 
                                      TargetLowering::DAGCombinerInfo &DCI) {
@@ -1576,6 +1675,8 @@
                                                DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
+  case ISD::ADD:      return PerformADDCombine(N, DCI);
+  case ISD::SUB:      return PerformSUBCombine(N, DCI);
   case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
   }
   

Modified: llvm/branches/Apple/Dib/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/X86/README.txt?rev=66914&r1=66913&r2=66914&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/X86/README.txt (original)
+++ llvm/branches/Apple/Dib/lib/Target/X86/README.txt Fri Mar 13 13:08:19 2009
@@ -1831,3 +1831,138 @@
 
 //===---------------------------------------------------------------------===//
 
+We currently compile this:
+
+define i32 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+normal:
+  ret i32 %sum
+overflow:
+  call void @llvm.trap()
+  unreachable
+}
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare void @llvm.trap()
+
+to:
+
+_func1:
+	movl	4(%esp), %eax
+	addl	8(%esp), %eax
+	jo	LBB1_2	## overflow
+LBB1_1:	## normal
+	ret
+LBB1_2:	## overflow
+	ud2
+
+it would be nice to produce "into" someday.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void vec_mpys1(int y[], const int x[], int scaler) {
+int i;
+for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * (long long)x[i]) >> 31);
+}
+
+Compiles to this loop with GCC 3.x:
+
+.L5:
+	movl	%ebx, %eax
+	imull	(%edi,%ecx,4)
+	shrdl	$31, %edx, %eax
+	addl	%eax, (%esi,%ecx,4)
+	incl	%ecx
+	cmpl	$149, %ecx
+	jle	.L5
+
+llvm-gcc compiles it to the much uglier:
+
+LBB1_1:	## bb1
+	movl	24(%esp), %eax
+	movl	(%eax,%edi,4), %ebx
+	movl	%ebx, %ebp
+	imull	%esi, %ebp
+	movl	%ebx, %eax
+	mull	%ecx
+	addl	%ebp, %edx
+	sarl	$31, %ebx
+	imull	%ecx, %ebx
+	addl	%edx, %ebx
+	shldl	$1, %eax, %ebx
+	movl	20(%esp), %eax
+	addl	%ebx, (%eax,%edi,4)
+	incl	%edi
+	cmpl	$150, %edi
+	jne	LBB1_1	## bb1
+
+//===---------------------------------------------------------------------===//
+
+Test instructions can be eliminated by using EFLAGS values from arithmetic
+instructions. This is currently not done for mul, and, or, xor, neg, shl,
+sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
+for read-modify-write instructions. It is also current not done if the
+OF or CF flags are needed.
+
+The shift operators have the complication that when the shift count is
+zero, EFLAGS is not set, so they can only subsume a test instruction if
+the shift count is known to be non-zero. Also, using the EFLAGS value
+from a shift is apparently very slow on some x86 implementations.
+
+In read-modify-write instructions, the root node in the isel match is
+the store, and isel has no way for the use of the EFLAGS result of the
+arithmetic to be remapped to the new node.
+
+Add and subtract instructions set OF on signed overflow and CF on unsiged
+overflow, while test instructions always clear OF and CF. In order to
+replace a test with an add or subtract in a situation where OF or CF is
+needed, codegen must be able to prove that the operation cannot see
+signed or unsigned overflow, respectively.
+
+//===---------------------------------------------------------------------===//
+
+memcpy/memmove do not lower to SSE copies when possible.  A silly example is:
+define <16 x float> @foo(<16 x float> %A) nounwind {
+	%tmp = alloca <16 x float>, align 16
+	%tmp2 = alloca <16 x float>, align 16
+	store <16 x float> %A, <16 x float>* %tmp
+	%s = bitcast <16 x float>* %tmp to i8*
+	%s2 = bitcast <16 x float>* %tmp2 to i8*
+	call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16)
+	%R = load <16 x float>* %tmp2
+	ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+which compiles to:
+
+_foo:
+	subl	$140, %esp
+	movaps	%xmm3, 112(%esp)
+	movaps	%xmm2, 96(%esp)
+	movaps	%xmm1, 80(%esp)
+	movaps	%xmm0, 64(%esp)
+	movl	60(%esp), %eax
+	movl	%eax, 124(%esp)
+	movl	56(%esp), %eax
+	movl	%eax, 120(%esp)
+	movl	52(%esp), %eax
+        <many many more 32-bit copies>
+      	movaps	(%esp), %xmm0
+	movaps	16(%esp), %xmm1
+	movaps	32(%esp), %xmm2
+	movaps	48(%esp), %xmm3
+	addl	$140, %esp
+	ret
+
+On Nehalem, it may even be cheaper to just use movups when unaligned than to
+fall back to lower-granularity chunks.
+
+//===---------------------------------------------------------------------===//

Modified: llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp?rev=66914&r1=66913&r2=66914&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp Fri Mar 13 13:08:19 2009
@@ -5701,7 +5701,8 @@
         !isScalarFPTypeInSSEReg(VT))  // FPStack?
       IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
     
-    if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME
+    if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+        Opc == X86ISD::BT) { // FIXME
       Cond = Cmp;
       addTest = false;
     }
@@ -8178,9 +8179,212 @@
 
   }
 
+  // If this is a select between two integer constants, try to do some
+  // optimizations.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+      // Don't do this for crazy integer types.
+      if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+        // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+        // so that TrueC (the true value) is larger than FalseC.
+        bool NeedsCondInvert = false;
+        
+        if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+            // Efficiently invertible.
+            (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
+             (Cond.getOpcode() == ISD::XOR &&   // xor(X, C) -> invertible.
+              isa<ConstantSDNode>(Cond.getOperand(1))))) {
+          NeedsCondInvert = true;
+          std::swap(TrueC, FalseC);
+        }
+   
+        // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
+        if (FalseC->getAPIntValue() == 0 &&
+            TrueC->getAPIntValue().isPowerOf2()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+          
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+          
+          unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+          return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+                             DAG.getConstant(ShAmt, MVT::i8));
+        }
+        
+        // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+        if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+          
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             FalseC->getValueType(0), Cond);
+          return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                             SDValue(FalseC, 0));
+        }
+        
+        // Optimize cases that will turn into an LEA instruction.  This requires
+        // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+        if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+          uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+          if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+          
+          bool isFastMultiplier = false;
+          if (Diff < 10) {
+            switch ((unsigned char)Diff) {
+              default: break;
+              case 1:  // result = add base, cond
+              case 2:  // result = lea base(    , cond*2)
+              case 3:  // result = lea base(cond, cond*2)
+              case 4:  // result = lea base(    , cond*4)
+              case 5:  // result = lea base(cond, cond*4)
+              case 8:  // result = lea base(    , cond*8)
+              case 9:  // result = lea base(cond, cond*8)
+                isFastMultiplier = true;
+                break;
+            }
+          }
+          
+          if (isFastMultiplier) {
+            APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+            if (NeedsCondInvert) // Invert the condition if needed.
+              Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(1, Cond.getValueType()));
+            
+            // Zero extend the condition if needed.
+            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                               Cond);
+            // Scale the condition by the difference.
+            if (Diff != 1)
+              Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(Diff, Cond.getValueType()));
+            
+            // Add the base if non-zero.
+            if (FalseC->getAPIntValue() != 0)
+              Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                                 SDValue(FalseC, 0));
+            return Cond;
+          }
+        }      
+      }
+  }
+      
   return SDValue();
 }
 
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc DL = N->getDebugLoc();
+  
+  // If the flag operand isn't dead, don't touch this CMOV.
+  if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+    return SDValue();
+  
+  // If this is a select between two integer constants, try to do some
+  // optimizations.  Note that the operands are ordered the opposite of SELECT
+  // operands.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+      // larger than FalseC (the false value).
+      X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+        
+      if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+        CC = X86::GetOppositeBranchCondition(CC);
+        std::swap(TrueC, FalseC);
+      }
+        
+      // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
+      // This is efficient for any integer data type (including i8/i16) and
+      // shift amount.
+      if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+        SDValue Cond = N->getOperand(3);
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+      
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+        
+        unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+        Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+                           DAG.getConstant(ShAmt, MVT::i8));
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+      
+      // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
+      // for any integer data type, including i8/i16.
+      if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+        SDValue Cond = N->getOperand(3);
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+        
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                           FalseC->getValueType(0), Cond);
+        Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                           SDValue(FalseC, 0));
+        
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+      
+      // Optimize cases that will turn into an LEA instruction.  This requires
+      // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+      if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+        uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+        if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+       
+        bool isFastMultiplier = false;
+        if (Diff < 10) {
+          switch ((unsigned char)Diff) {
+          default: break;
+          case 1:  // result = add base, cond
+          case 2:  // result = lea base(    , cond*2)
+          case 3:  // result = lea base(cond, cond*2)
+          case 4:  // result = lea base(    , cond*4)
+          case 5:  // result = lea base(cond, cond*4)
+          case 8:  // result = lea base(    , cond*8)
+          case 9:  // result = lea base(cond, cond*8)
+            isFastMultiplier = true;
+            break;
+          }
+        }
+        
+        if (isFastMultiplier) {
+          APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+          SDValue Cond = N->getOperand(3);
+          Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                             DAG.getConstant(CC, MVT::i8), Cond);
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                             Cond);
+          // Scale the condition by the difference.
+          if (Diff != 1)
+            Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(Diff, Cond.getValueType()));
+
+          // Add the base if non-zero.
+          if (FalseC->getAPIntValue() != 0)
+            Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                               SDValue(FalseC, 0));
+          if (N->getNumValues() == 2)  // Dead flag value?
+            return DCI.CombineTo(N, Cond, SDValue());
+          return Cond;
+        }
+      }      
+    }
+  }
+  return SDValue();
+}
+
+
 /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
 ///                       when possible.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
@@ -8441,6 +8645,7 @@
   case ISD::BUILD_VECTOR:
     return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);

Added: llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll?rev=66914&view=auto

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll Fri Mar 13 13:08:19 2009
@@ -0,0 +1,22 @@
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)}
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+}
+
+; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax}
+define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+}
+

Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll?rev=66914&r1=66913&r2=66914&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll Fri Mar 13 13:08:19 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep 24576
 ; PR2135
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -19,9 +19,6 @@
 	ret void
 }
 
-define i1 @report__equal(i32 %x, i32 %y) {
-	%tmp = icmp eq i32 %x, %y		
-	ret i1 %tmp
-}
+declare i1 @report__equal(i32 %x, i32 %y) nounwind;
 
 declare void @abort()

Added: llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll?rev=66914&view=auto

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll Fri Mar 13 13:08:19 2009
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llc | not grep cmov
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test(i32* nocapture %P) nounwind readonly {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+; 	setl	%al
+;	movzbl	%al, %eax
+;	leal	4(%eax,%eax,8), %eax
+define i32 @test2(i32* nocapture %P) nounwind readonly {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+