[llvm-commits] [llvm] r96389 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/lsr-reuse-trunc.ll test/CodeGen/X86/sink-hoist.ll
Evan Cheng
evan.cheng at apple.com
Tue Feb 16 13:09:53 PST 2010
Author: evancheng
Date: Tue Feb 16 15:09:44 2010
New Revision: 96389
URL: http://llvm.org/viewvc/llvm-project?rev=96389&view=rev
Log:
Look for SSE and instructions of this form: (and x, (build_vector c1,c2,c3,c4)).
If there exists a use of a build_vector that's the bitwise complement of the mask,
then transform the node to
(and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)).
Since this transformation is only useful when 1) the given build_vector will
become a load from constpool, and 2) (and (xor x -1), y) matches to a single
instruction, I decided this is appropriate as a x86 specific transformation.
rdar://7323335
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll
llvm/trunk/test/CodeGen/X86/sink-hoist.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=96389&r1=96388&r2=96389&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 16 15:09:44 2010
@@ -990,6 +990,7 @@
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -9157,6 +9158,53 @@
return SDValue();
}
+/// PerformANDCombine - Look for SSE and instructions of this form:
+/// (and x, (build_vector c1,c2,c3,c4)). If there exists a use of a build_vector
+/// that's the bitwise complement of the mask, then transform the node to
+/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)).
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse())
+ return SDValue();
+
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue, 8> Mask;
+ Mask.reserve(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Arg = N1.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(Arg);
+ continue;
+ }
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg);
+ if (!C) return SDValue();
+ Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT));
+ }
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT,
+ &Mask[0], NumElts);
+ if (!N1.use_empty()) {
+ unsigned Bits = EltVT.getSizeInBits();
+ Mask.clear();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT));
+ SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VT, &Mask[0], NumElts);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ N0, NewMask), N1);
+ }
+ }
+
+ return SDValue();
+}
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
@@ -9305,7 +9353,7 @@
}
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
@@ -9690,6 +9738,7 @@
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::AND: return PerformANDCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
Modified: llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll?rev=96389&r1=96388&r2=96389&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll Tue Feb 16 15:09:44 2010
@@ -1,10 +1,19 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; Full strength reduction wouldn't reduce register pressure, so LSR should
; stick with indexing here.
-; CHECK: movaps (%rsi,%rax,4), %xmm3
-; CHECK: movaps %xmm3, (%rdi,%rax,4)
+; Also checks andps and andnps shares the same constantpool. Previously llvm
+; will codegen two andps, one using 0x80000000, the other 0x7fffffff.
+; rdar://7323335
+
+; CHECK: movaps LCPI1_0
+; CHECK: movaps LCPI1_1
+; CHECK-NOT: movaps LCPI1_2
+; CHECK: movaps (%rsi,%rax,4), %xmm2
+; CHECK: andps
+; CHECK: andnps
+; CHECK: movaps %xmm2, (%rdi,%rax,4)
; CHECK: addq $4, %rax
; CHECK: cmpl %eax, (%rdx)
; CHECK-NEXT: jg
Modified: llvm/trunk/test/CodeGen/X86/sink-hoist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sink-hoist.ll?rev=96389&r1=96388&r2=96389&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sink-hoist.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sink-hoist.ll Tue Feb 16 15:09:44 2010
@@ -63,7 +63,6 @@
; CHECK: vv:
; CHECK: LCPI4_0(%rip), %xmm0
; CHECK: LCPI4_1(%rip), %xmm1
-; CHECK: LCPI4_2(%rip), %xmm2
; CHECK: align
; CHECK-NOT: LCPI
; CHECK: ret
More information about the llvm-commits
mailing list