[llvm-commits] [llvm] r66869 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/select-no-cmov.ll

Thu Mar 12 22:53:32 PDT 2009

Author: lattner
Date: Fri Mar 13 00:53:31 2009
New Revision: 66869

URL: http://llvm.org/viewvc/llvm-project?rev=66869&view=rev
Log:
generalize the previous code to use the full generality of LEA
for i32/i64 expressions (we could also do i16 on cpus where
i16 lea is fast, but I didn't add this).  On the example, we now
generate:

_test:
	movl	4(%esp), %eax
	cmpl	$42, (%eax)
	setl	%al
	movzbl	%al, %eax
	leal	4(%eax,%eax,8), %eax
	ret

instead of:

_test:
	movl	4(%esp), %eax
	cmpl	$41, (%eax)
	movl	$4, %ecx
	movl	$13, %eax
	cmovg	%ecx, %eax
	ret


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/select-no-cmov.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=66869&r1=66868&r2=66869&view=diff

==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Mar 13 00:53:31 2009
@@ -8188,25 +8188,26 @@
   
   // If this is a select between two integer constants, try to do some
   // optimizations.
-  if (ConstantSDNode *LHSC = dyn_cast<ConstantSDNode>(LHS)) {
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS))
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
       // Don't do this for crazy integer types.
       if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
         // If this is efficiently invertible, canonicalize the LHSC/RHSC values
-        // so that LHSC (the true value) is larger than RHSC (the false value).
+        // so that TrueC (the true value) is larger than FalseC.
         bool NeedsCondInvert = false;
         
-        if (LHSC->getAPIntValue().ult(RHSC->getAPIntValue()) &&
+        if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
             // Efficiently invertible.
             (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
              (Cond.getOpcode() == ISD::XOR &&   // xor(X, C) -> invertible.
               isa<ConstantSDNode>(Cond.getOperand(1))))) {
           NeedsCondInvert = true;
-          std::swap(LHSC, RHSC);
+          std::swap(TrueC, FalseC);
         }
    
         // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
-        if (RHSC->getAPIntValue() == 0 && LHSC->getAPIntValue().isPowerOf2()) {
+        if (FalseC->getAPIntValue() == 0 &&
+            TrueC->getAPIntValue().isPowerOf2()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
                                DAG.getConstant(1, Cond.getValueType()));
@@ -8214,22 +8215,67 @@
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
           
-          unsigned ShAmt = LHSC->getAPIntValue().logBase2();
+          unsigned ShAmt = TrueC->getAPIntValue().logBase2();
           return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
                              DAG.getConstant(ShAmt, MVT::i8));
         }
         
         // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
-        if (RHSC->getAPIntValue()+1 == LHSC->getAPIntValue()) {
+        if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
                                DAG.getConstant(1, Cond.getValueType()));
           
           // Zero extend the condition if needed.
-          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, RHSC->getValueType(0), Cond);
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             FalseC->getValueType(0), Cond);
           return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
-                             SDValue(RHSC, 0));
+                             SDValue(FalseC, 0));
         }
+        
+        // Optimize cases that will turn into an LEA instruction.  This requires
+        // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+        if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+          uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+          if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+          
+          bool isFastMultiplier = false;
+          if (Diff < 10) {
+            switch ((unsigned char)Diff) {
+              default: break;
+              case 1:  // result = add base, cond
+              case 2:  // result = lea base(    , cond*2)
+              case 3:  // result = lea base(cond, cond*2)
+              case 4:  // result = lea base(    , cond*4)
+              case 5:  // result = lea base(cond, cond*4)
+              case 8:  // result = lea base(    , cond*8)
+              case 9:  // result = lea base(cond, cond*8)
+                isFastMultiplier = true;
+                break;
+            }
+          }
+          
+          if (isFastMultiplier) {
+            APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+            if (NeedsCondInvert) // Invert the condition if needed.
+              Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(1, Cond.getValueType()));
+            
+            // Zero extend the condition if needed.
+            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                               Cond);
+            // Scale the condition by the difference.
+            if (Diff != 1)
+              Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(Diff, Cond.getValueType()));
+            
+            // Add the base if non-zero.
+            if (FalseC->getAPIntValue() != 0)
+              Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                                 SDValue(FalseC, 0));
+            return Cond;
+          }
+        }      
       }
   }
       
@@ -8260,6 +8306,8 @@
       }
         
       // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
+      // This is efficient for any integer data type (including i8/i16) and
+      // shift amount.
       if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
         SDValue Cond = N->getOperand(3);
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
@@ -8275,21 +8323,69 @@
           return DCI.CombineTo(N, Cond, SDValue());
         return Cond;
       }
-
-      // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+      
+      // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
+      // for any integer data type, including i8/i16.
       if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
         SDValue Cond = N->getOperand(3);
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
                            DAG.getConstant(CC, MVT::i8), Cond);
         
         // Zero extend the condition if needed.
-        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                           FalseC->getValueType(0), Cond);
         Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
                            SDValue(FalseC, 0));
+        
         if (N->getNumValues() == 2)  // Dead flag value?
           return DCI.CombineTo(N, Cond, SDValue());
         return Cond;
       }
+      
+      // Optimize cases that will turn into an LEA instruction.  This requires
+      // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+      if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+        uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+        if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+       
+        bool isFastMultiplier = false;
+        if (Diff < 10) {
+          switch ((unsigned char)Diff) {
+          default: break;
+          case 1:  // result = add base, cond
+          case 2:  // result = lea base(    , cond*2)
+          case 3:  // result = lea base(cond, cond*2)
+          case 4:  // result = lea base(    , cond*4)
+          case 5:  // result = lea base(cond, cond*4)
+          case 8:  // result = lea base(    , cond*8)
+          case 9:  // result = lea base(cond, cond*8)
+            isFastMultiplier = true;
+            break;
+          }
+        }
+        
+        if (isFastMultiplier) {
+          APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+          SDValue Cond = N->getOperand(3);
+          Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                             DAG.getConstant(CC, MVT::i8), Cond);
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                             Cond);
+          // Scale the condition by the difference.
+          if (Diff != 1)
+            Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(Diff, Cond.getValueType()));
+
+          // Add the base if non-zero.
+          if (FalseC->getAPIntValue() != 0)
+            Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                               SDValue(FalseC, 0));
+          if (N->getNumValues() == 2)  // Dead flag value?
+            return DCI.CombineTo(N, Cond, SDValue());
+          return Cond;
+        }
+      }      
     }
   }
   return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/select-no-cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-no-cmov.ll?rev=66869&r1=66868&r2=66869&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-no-cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-no-cmov.ll Fri Mar 13 00:53:31 2009
@@ -12,3 +12,15 @@
 	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
 	ret i32 %iftmp.0.0
 }
+
+; 	setl	%al
+;	movzbl	%al, %eax
+;	leal	4(%eax,%eax,8), %eax
+define i32 @test2(i32* nocapture %P) nounwind readonly {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+