[llvm-commits] [llvm] r149968 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/avx-splat.ll test/CodeGen/X86/avx-vpermil.ll
Craig Topper
craig.topper at gmail.com
Mon Feb 6 22:28:43 PST 2012
Author: ctopper
Date: Tue Feb 7 00:28:42 2012
New Revision: 149968
URL: http://llvm.org/viewvc/llvm-project?rev=149968&view=rev
Log:
Add instruction selection for 256-bit VPSHUFD and 128-bit VPERMILPS/VPERMILPD.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
llvm/trunk/test/CodeGen/X86/avx-splat.ll
llvm/trunk/test/CodeGen/X86/avx-vpermil.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=149968&r1=149967&r2=149968&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 7 00:28:42 2012
@@ -3713,7 +3713,7 @@
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
if (!HasAVX)
return false;
@@ -6467,6 +6467,9 @@
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
+
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
@@ -6636,9 +6639,13 @@
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasAVX))
+ if (isVPERMILPMask(M, VT, HasAVX)) {
+ if (HasAVX2 && VT == MVT::v8i32)
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
X86::getShuffleSHUFImmediate(SVOp), DAG);
+ }
// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasAVX))
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=149968&r1=149967&r2=149968&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Feb 7 00:28:42 2012
@@ -3993,21 +3993,19 @@
(undef))))]>;
}
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
- PatFrag bc_frag> {
+multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
def Yri : Ii8<0x70, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1,
- (undef))))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
def Ymi : Ii8<0x70, MRMSrcMem,
(outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (pshuf_frag:$src2
- (bc_frag (memopv4i64 addr:$src1)),
- (undef))))]>;
+ [(set VR256:$dst,
+ (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))]>;
}
} // ExeDomain = SSEPackedInt
@@ -4053,17 +4051,9 @@
}
let Predicates = [HasAVX2] in {
- let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB,
- OpSize, VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS,
- VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD,
- VEX;
+ defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
+ defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
+ defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
}
let Predicates = [HasSSE2] in {
@@ -4226,9 +4216,9 @@
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
- (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
- def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
(VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+ def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
//===---------------------------------------------------------------------===//
@@ -7200,6 +7190,19 @@
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
+
+def : Pat<(v4f32 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+ (VPERMILPSri VR128:$src1, imm:$imm)>;
+def : Pat<(v2f64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+ (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+ (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86VPermilp (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPSmi addr:$src1, imm:$imm)>;
+def : Pat<(v2f64 (X86VPermilp (memopv2f64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDmi addr:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDmi addr:$src1, imm:$imm)>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=149968&r1=149967&r2=149968&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Tue Feb 7 00:28:42 2012
@@ -6,7 +6,7 @@
ret <4 x float> %b
; CHECK: test1:
; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
}
; rdar://10538417
@@ -98,22 +98,40 @@
}
define <4 x float> @test11(<4 x float> %a) nounwind {
-; CHECK: pshufd $27
+; check: test11
+; check: vpermilps $27
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1
}
define <4 x float> @test12(<4 x float>* %a) nounwind {
-; CHECK: pshufd $27, (
+; CHECK: test12
+; CHECK: vpermilps $27, (
%tmp0 = load <4 x float>* %a
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1
}
-;CHECK: test13
-;CHECK: shufd
-;CHECK: ret
-define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
+define <4 x i32> @test13(<4 x i32> %a) nounwind {
+; check: test13
+; check: vpshufd $27
+ %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+ %tmp0 = load <4 x i32>* %a
+ %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
%x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x i32>%x1
}
+
Modified: llvm/trunk/test/CodeGen/X86/avx-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-splat.ll?rev=149968&r1=149967&r2=149968&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-splat.ll Tue Feb 7 00:28:42 2012
@@ -32,7 +32,7 @@
ret <4 x i64> %vecinit6.i
}
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
; CHECK-NEXT: vinsertf128 $1
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
entry:
Modified: llvm/trunk/test/CodeGen/X86/avx-vpermil.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vpermil.ll?rev=149968&r1=149967&r2=149968&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vpermil.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vpermil.ll Tue Feb 7 00:28:42 2012
@@ -45,7 +45,7 @@
ret <8 x float> %shuffle
}
-; CHECK-NOT: vpermilps
+; CHECK: vpermilps
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
More information about the llvm-commits
mailing list