[llvm-commits] [llvm] r122098 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec-sign.ll
Nate Begeman
natebegeman at mac.com
Fri Dec 17 14:55:38 PST 2010
Author: sampo
Date: Fri Dec 17 16:55:37 2010
New Revision: 122098
URL: http://llvm.org/viewvc/llvm-project?rev=122098&view=rev
Log:
Add support for matching psign & plendvb to the x86 target
Remove unnecessary pandn patterns, 'vnot' patfrag looks through bitcasts
Added:
llvm/trunk/test/CodeGen/X86/vec-sign.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=122098&r1=122097&r2=122098&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 17 16:55:37 2010
@@ -981,6 +981,7 @@
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
@@ -8870,6 +8871,10 @@
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
@@ -11053,6 +11058,36 @@
return SDValue();
}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Want to form PANDN nodes, in the hopes of then easily combining them with
+ // OR and AND nodes to form PBLEND/PSIGN.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -11060,12 +11095,101 @@
return SDValue();
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
- // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::PANDN)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and PANDN and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ unsigned IID = Intrinsic::x86_sse41_pblendvb;
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::v16i8,
+ DAG.getConstant(IID, MVT::i32), X, Y, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
@@ -11116,7 +11240,7 @@
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
-
+
return SDValue();
}
@@ -11334,6 +11458,7 @@
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=122098&r1=122097&r2=122098&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Dec 17 16:55:37 2010
@@ -159,7 +159,13 @@
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
-
+
+ /// PANDN - and with not'd value.
+ PANDN,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=122098&r1=122097&r2=122098&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Fri Dec 17 16:55:37 2010
@@ -43,6 +43,18 @@
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+def X86pandn : SDNode<"X86ISD::PANDN",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=122098&r1=122097&r2=122098&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec 17 16:55:37 2010
@@ -1584,19 +1584,13 @@
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
// single r+r
- [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
// double r+r
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))],
+ [],
// single r+m
- [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
// double r+m
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]]>;
+ []]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -2536,15 +2530,11 @@
}
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
def PANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
} // Constraints = "$src1 = $dst"
@@ -3608,6 +3598,13 @@
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
@@ -3896,27 +3893,6 @@
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-// Some special case pandn patterns.
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
// vector -> vector casts
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
Added: llvm/trunk/test/CodeGen/X86/vec-sign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec-sign.ll?rev=122098&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec-sign.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec-sign.ll Fri Dec 17 16:55:37 2010
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @psignd(<4 x i32> %a, <4 x i32> %b) nounwind ssp {
+entry:
+; CHECK: psignd
+; CHECK-NOT: sub
+ %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %sub = sub nsw <4 x i32> zeroinitializer, %a
+ %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = and <4 x i32> %a, %0
+ %2 = and <4 x i32> %b.lobit, %sub
+ %cond = or <4 x i32> %1, %2
+ ret <4 x i32> %cond
+}
+
+define <4 x i32> @pblendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind ssp {
+entry:
+; CHECK: pblendvb
+ %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %sub = sub nsw <4 x i32> zeroinitializer, %a
+ %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = and <4 x i32> %c, %0
+ %2 = and <4 x i32> %a, %b.lobit
+ %cond = or <4 x i32> %1, %2
+ ret <4 x i32> %cond
+}
More information about the llvm-commits
mailing list