[llvm-commits] [llvm] r66914 - in /llvm/branches/Apple/Dib: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/X86/README.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/const-select.ll test/CodeGen/X86/mul-legalize.ll test/CodeGen/X86/select-no-cmov.ll
Bill Wendling
isanbard at gmail.com
Fri Mar 13 11:08:20 PDT 2009
Author: void
Date: Fri Mar 13 13:08:19 2009
New Revision: 66914
URL: http://llvm.org/viewvc/llvm-project?rev=66914&view=rev
Log:
--- Merging (from foreign repository) r66779 into '.':
U test/CodeGen/X86/mul-legalize.ll
A test/CodeGen/X86/const-select.ll
U lib/CodeGen/SelectionDAG/DAGCombiner.cpp
U lib/Target/ARM/ARMISelLowering.cpp
U lib/Target/X86/X86ISelLowering.cpp
U lib/Target/X86/README.txt
Move 3 "(add (select cc, 0, c), x) -> (select cc, x, (add, x, c))"
related transformations out of target-specific dag combine into the
ARM backend. These were added by Evan in r37685 with no testcases
and only seems to help ARM (e.g. test/CodeGen/ARM/select_xform.ll).
Add some simple X86-specific (for now) DAG combines that turn things
like cond ? 8 : 0 -> (zext(cond) << 3). This happens frequently
with the recently added cp constant select optimization, but is a
very general xform. For example, we now compile the second example
in const-select.ll to:
_test:
movsd LCPI2_0, %xmm0
ucomisd 8(%esp), %xmm0
seta %al
movzbl %al, %eax
movl 4(%esp), %ecx
movsbl (%ecx,%eax,4), %eax
ret
instead of:
_test:
movl 4(%esp), %eax
leal 4(%eax), %ecx
movsd LCPI2_0, %xmm0
ucomisd 8(%esp), %xmm0
cmovbe %eax, %ecx
movsbl (%ecx), %eax
ret
This passes multisource and dejagnu.
--- Merging (from foreign repository) r66868 into '.':
A test/CodeGen/X86/select-no-cmov.ll
G lib/Target/X86/X86ISelLowering.cpp
optimize the case of cond ? 42 : 41 and friends. This compiles the
example to:
_test:
movl 4(%esp), %eax
cmpl $41, (%eax)
setg %al
movzbl %al, %eax
orl $4294967294, %eax
ret
instead of:
movl 4(%esp), %eax
cmpl $41, (%eax)
movl $4294967294, %ecx
movl $4294967295, %eax
cmova %ecx, %eax
ret
which is smaller in code size and faster. rdar://6668608
--- Merging (from foreign repository) r66869 into '.':
U test/CodeGen/X86/select-no-cmov.ll
G lib/Target/X86/X86ISelLowering.cpp
generalize the previous code to use the full generality of LEA
for i32/i64 expressions (we could also do i16 on cpus where
i16 lea is fast, but I didn't add this). On the example, we now
generate:
_test:
movl 4(%esp), %eax
cmpl $42, (%eax)
setl %al
movzbl %al, %eax
leal 4(%eax,%eax,8), %eax
ret
instead of:
_test:
movl 4(%esp), %eax
cmpl $41, (%eax)
movl $4, %ecx
movl $13, %eax
cmovg %ecx, %eax
ret
Added:
llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll
Modified:
llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp
llvm/branches/Apple/Dib/lib/Target/X86/README.txt
llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll
Modified: llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=66914&r1=66913&r2=66914&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Mar 13 13:08:19 2009
@@ -543,8 +543,10 @@
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
- AddToWorkList(To[i].getNode());
- AddUsersToWorkList(To[i].getNode());
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
}
}
@@ -944,65 +946,6 @@
return SDValue();
}
-static
-SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- SelectionDAG &DAG, const TargetLowering &TLI,
- bool LegalOperations) {
- MVT VT = N->getValueType(0);
- unsigned Opc = N->getOpcode();
- bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
- SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
- SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
- ISD::CondCode CC = ISD::SETCC_INVALID;
-
- if (isSlctCC) {
- CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
- } else {
- SDValue CCOp = Slct.getOperand(0);
- if (CCOp.getOpcode() == ISD::SETCC)
- CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
- }
-
- bool DoXform = false;
- bool InvCC = false;
- assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
- "Bad input!");
-
- if (LHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(LHS)->isNullValue()) {
- DoXform = true;
- } else if (CC != ISD::SETCC_INVALID &&
- RHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(RHS)->isNullValue()) {
- std::swap(LHS, RHS);
- SDValue Op0 = Slct.getOperand(0);
- MVT OpVT = isSlctCC ? Op0.getValueType() :
- Op0.getOperand(0).getValueType();
- bool isInt = OpVT.isInteger();
- CC = ISD::getSetCCInverse(CC, isInt);
-
- if (LegalOperations && !TLI.isCondCodeLegal(CC, OpVT))
- return SDValue(); // Inverse operator isn't legal.
-
- DoXform = true;
- InvCC = true;
- }
-
- if (DoXform) {
- SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
- if (isSlctCC)
- return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
- Slct.getOperand(0), Slct.getOperand(1), CC);
- SDValue CCOp = Slct.getOperand(0);
- if (InvCC)
- CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
- CCOp.getOperand(0), CCOp.getOperand(1), CC);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
- CCOp, OtherOp, Result);
- }
- return SDValue();
-}
-
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1123,16 +1066,6 @@
if (Result.getNode()) return Result;
}
- // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N0, N1, DAG, TLI, LegalOperations);
- if (Result.getNode()) return Result;
- }
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations);
- if (Result.getNode()) return Result;
- }
-
return SDValue();
}
@@ -1246,11 +1179,6 @@
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
- // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations);
- if (Result.getNode()) return Result;
- }
// If either operand of a sub is undef, the result is undef
if (N0.getOpcode() == ISD::UNDEF)
Modified: llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp?rev=66914&r1=66913&r2=66914&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Target/ARM/ARMISelLowering.cpp Fri Mar 13 13:08:19 2009
@@ -256,7 +256,10 @@
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::FMRRD - No need to call setTargetDAGCombine
-
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+
+
setStackPointerRegisterToSaveRestore(ARM::SP);
setSchedulingPreference(SchedulingForRegPressure);
setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
@@ -1562,6 +1565,102 @@
// ARM Optimization Hooks
//===----------------------------------------------------------------------===//
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ MVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+
/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
static SDValue PerformFMRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
@@ -1576,6 +1675,8 @@
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
}
Modified: llvm/branches/Apple/Dib/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/X86/README.txt?rev=66914&r1=66913&r2=66914&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/X86/README.txt (original)
+++ llvm/branches/Apple/Dib/lib/Target/X86/README.txt Fri Mar 13 13:08:19 2009
@@ -1831,3 +1831,138 @@
//===---------------------------------------------------------------------===//
+We currently compile this:
+
+define i32 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ %sum = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ br i1 %obit, label %overflow, label %normal
+normal:
+ ret i32 %sum
+overflow:
+ call void @llvm.trap()
+ unreachable
+}
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare void @llvm.trap()
+
+to:
+
+_func1:
+ movl 4(%esp), %eax
+ addl 8(%esp), %eax
+ jo LBB1_2 ## overflow
+LBB1_1: ## normal
+ ret
+LBB1_2: ## overflow
+ ud2
+
+it would be nice to produce "into" someday.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void vec_mpys1(int y[], const int x[], int scaler) {
+int i;
+for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * (long long)x[i]) >> 31);
+}
+
+Compiles to this loop with GCC 3.x:
+
+.L5:
+ movl %ebx, %eax
+ imull (%edi,%ecx,4)
+ shrdl $31, %edx, %eax
+ addl %eax, (%esi,%ecx,4)
+ incl %ecx
+ cmpl $149, %ecx
+ jle .L5
+
+llvm-gcc compiles it to the much uglier:
+
+LBB1_1: ## bb1
+ movl 24(%esp), %eax
+ movl (%eax,%edi,4), %ebx
+ movl %ebx, %ebp
+ imull %esi, %ebp
+ movl %ebx, %eax
+ mull %ecx
+ addl %ebp, %edx
+ sarl $31, %ebx
+ imull %ecx, %ebx
+ addl %edx, %ebx
+ shldl $1, %eax, %ebx
+ movl 20(%esp), %eax
+ addl %ebx, (%eax,%edi,4)
+ incl %edi
+ cmpl $150, %edi
+ jne LBB1_1 ## bb1
+
+//===---------------------------------------------------------------------===//
+
+Test instructions can be eliminated by using EFLAGS values from arithmetic
+instructions. This is currently not done for mul, and, or, xor, neg, shl,
+sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
+for read-modify-write instructions. It is also current not done if the
+OF or CF flags are needed.
+
+The shift operators have the complication that when the shift count is
+zero, EFLAGS is not set, so they can only subsume a test instruction if
+the shift count is known to be non-zero. Also, using the EFLAGS value
+from a shift is apparently very slow on some x86 implementations.
+
+In read-modify-write instructions, the root node in the isel match is
+the store, and isel has no way for the use of the EFLAGS result of the
+arithmetic to be remapped to the new node.
+
+Add and subtract instructions set OF on signed overflow and CF on unsiged
+overflow, while test instructions always clear OF and CF. In order to
+replace a test with an add or subtract in a situation where OF or CF is
+needed, codegen must be able to prove that the operation cannot see
+signed or unsigned overflow, respectively.
+
+//===---------------------------------------------------------------------===//
+
+memcpy/memmove do not lower to SSE copies when possible. A silly example is:
+define <16 x float> @foo(<16 x float> %A) nounwind {
+ %tmp = alloca <16 x float>, align 16
+ %tmp2 = alloca <16 x float>, align 16
+ store <16 x float> %A, <16 x float>* %tmp
+ %s = bitcast <16 x float>* %tmp to i8*
+ %s2 = bitcast <16 x float>* %tmp2 to i8*
+ call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16)
+ %R = load <16 x float>* %tmp2
+ ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+which compiles to:
+
+_foo:
+ subl $140, %esp
+ movaps %xmm3, 112(%esp)
+ movaps %xmm2, 96(%esp)
+ movaps %xmm1, 80(%esp)
+ movaps %xmm0, 64(%esp)
+ movl 60(%esp), %eax
+ movl %eax, 124(%esp)
+ movl 56(%esp), %eax
+ movl %eax, 120(%esp)
+ movl 52(%esp), %eax
+ <many many more 32-bit copies>
+ movaps (%esp), %xmm0
+ movaps 16(%esp), %xmm1
+ movaps 32(%esp), %xmm2
+ movaps 48(%esp), %xmm3
+ addl $140, %esp
+ ret
+
+On Nehalem, it may even be cheaper to just use movups when unaligned than to
+fall back to lower-granularity chunks.
+
+//===---------------------------------------------------------------------===//
Modified: llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp?rev=66914&r1=66913&r2=66914&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Target/X86/X86ISelLowering.cpp Fri Mar 13 13:08:19 2009
@@ -5701,7 +5701,8 @@
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
- if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
Cond = Cmp;
addTest = false;
}
@@ -8178,9 +8179,212 @@
}
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
return SDValue();
}
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
@@ -8441,6 +8645,7 @@
case ISD::BUILD_VECTOR:
return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
Added: llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll?rev=66914&view=auto
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/const-select.ll Fri Mar 13 13:08:19 2009
@@ -0,0 +1,22 @@
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)}
+define float @f(i32 %x) nounwind readnone {
+entry:
+ %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01 ; <float> [#uses=1]
+ ret float %iftmp.0.0
+}
+
+; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax}
+define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+ %0 = fcmp olt double %F, 4.200000e+01 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %0, i32 4, i32 0 ; <i32> [#uses=1]
+ %1 = getelementptr i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
+ %2 = load i8* %1, align 1 ; <i8> [#uses=1]
+ ret i8 %2
+}
+
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll?rev=66914&r1=66913&r2=66914&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/mul-legalize.ll Fri Mar 13 13:08:19 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep 24576
; PR2135
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -19,9 +19,6 @@
ret void
}
-define i1 @report__equal(i32 %x, i32 %y) {
- %tmp = icmp eq i32 %x, %y
- ret i1 %tmp
-}
+declare i1 @report__equal(i32 %x, i32 %y) nounwind;
declare void @abort()
Added: llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll?rev=66914&view=auto
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/select-no-cmov.ll Fri Mar 13 13:08:19 2009
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llc | not grep cmov
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test(i32* nocapture %P) nounwind readonly {
+entry:
+ %0 = load i32* %P, align 4 ; <i32> [#uses=1]
+ %1 = icmp sgt i32 %0, 41 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %1, i32 -1, i32 -2 ; <i32> [#uses=1]
+ ret i32 %iftmp.0.0
+}
+
+; setl %al
+; movzbl %al, %eax
+; leal 4(%eax,%eax,8), %eax
+define i32 @test2(i32* nocapture %P) nounwind readonly {
+entry:
+ %0 = load i32* %P, align 4 ; <i32> [#uses=1]
+ %1 = icmp sgt i32 %0, 41 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %1, i32 4, i32 13 ; <i32> [#uses=1]
+ ret i32 %iftmp.0.0
+}
+
More information about the llvm-commits
mailing list