[llvm-commits] [llvm] r135088 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-256-logic.ll
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Wed Jul 13 14:36:51 PDT 2011
Author: bruno
Date: Wed Jul 13 16:36:51 2011
New Revision: 135088
URL: http://llvm.org/viewvc/llvm-project?rev=135088&view=rev
Log:
Make X86ISD::ANDNP more general and Codegen 256-bit VANDNP. A more
general version of X86ISD::ANDNP also opened the room for a little bit
of refactoring.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx-256-logic.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=135088&r1=135087&r2=135088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 13 16:36:51 2011
@@ -11821,10 +11821,12 @@
if (R.getNode())
return R;
- // Want to form ANDNP nodes, in the hopes of then easily combining them with
- // OR and AND nodes to form PBLEND/PSIGN.
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64)
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=135088&r1=135087&r2=135088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Jul 13 16:36:51 2011
@@ -47,7 +47,7 @@
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
- SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=135088&r1=135087&r2=135088&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jul 13 16:36:51 2011
@@ -1473,98 +1473,68 @@
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasPat = 0,
- list<list<dag>> Pattern = []> {
+ SDNode OpNode> {
let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- VEX_4V;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- OpSize, VEX_4V;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>, TB;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>,
- TB, OpSize;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
}
}
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasNoPat = 0> {
+ SDNode OpNode> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem,
- !if(HasNoPat, []<dag>, // rr
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1,
- VR256:$src2)))]),
- !if(HasNoPat, []<dag>, // rm
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))]), 0>, VEX_4V;
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, VEX_4V;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
- !if(HasNoPat, []<dag>, // rr
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))]),
- !if(HasNoPat, []<dag>, // rm
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))]), 0>,
- OpSize, VEX_4V;
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-let isCommutable = 0 in {
- defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>;
-}
+defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
- // single r+r
- [(set VR128:$dst, (X86andnp VR128:$src1, VR128:$src2))],
- // double r+r
- [],
- // single r+m
- [(set VR128:$dst, (X86andnp VR128:$src1, (memopv2i64 addr:$src2)))],
- // double r+m
- []]>;
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -3678,6 +3648,7 @@
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
}
// Move scalar to XMM zero-extended
Modified: llvm/trunk/test/CodeGen/X86/avx-256-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-256-logic.ll?rev=135088&r1=135087&r2=135088&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-256-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-256-logic.ll Wed Jul 13 16:36:51 2011
@@ -114,3 +114,48 @@
ret <8 x float> %1
}
+; CHECK: vandnpd
+define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+ %0 = bitcast <4 x double> %x to <4 x i64>
+ %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %1 = bitcast <4 x double> %y to <4 x i64>
+ %and.i = and <4 x i64> %1, %neg.i
+ %2 = bitcast <4 x i64> %and.i to <4 x double>
+ ret <4 x double> %2
+}
+
+; CHECK: vandnpd (%
+define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+ %tmp2 = load <4 x double>* %x, align 32
+ %0 = bitcast <4 x double> %y to <4 x i64>
+ %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %1 = bitcast <4 x double> %tmp2 to <4 x i64>
+ %and.i = and <4 x i64> %1, %neg.i
+ %2 = bitcast <4 x i64> %and.i to <4 x double>
+ ret <4 x double> %2
+}
+
+; CHECK: vandnps
+define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+ %0 = bitcast <8 x float> %x to <8 x i32>
+ %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = bitcast <8 x float> %y to <8 x i32>
+ %and.i = and <8 x i32> %1, %neg.i
+ %2 = bitcast <8 x i32> %and.i to <8 x float>
+ ret <8 x float> %2
+}
+
+; CHECK: vandnps (%
+define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+ %tmp2 = load <8 x float>* %x, align 32
+ %0 = bitcast <8 x float> %y to <8 x i32>
+ %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %1 = bitcast <8 x float> %tmp2 to <8 x i32>
+ %and.i = and <8 x i32> %1, %neg.i
+ %2 = bitcast <8 x i32> %and.i to <8 x float>
+ ret <8 x float> %2
+}
More information about the llvm-commits
mailing list