[llvm] r274908 - [X86][AVX2] Add support for target shuffle combining to VPERMPD/VPERMQ
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 8 12:23:29 PDT 2016
Author: rksimon
Date: Fri Jul 8 14:23:29 2016
New Revision: 274908
URL: http://llvm.org/viewvc/llvm-project?rev=274908&view=rev
Log:
[X86][AVX2] Add support for target shuffle combining to VPERMPD/VPERMQ
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=274908&r1=274907&r2=274908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jul 8 14:23:29 2016
@@ -24857,9 +24857,26 @@ static bool matchPermuteVectorShuffle(MV
if (SrcVT.is256BitVector() && !Subtarget.hasAVX2())
FloatDomain = true;
- // TODO - support LaneCrossing for AVX2 PERMQ/PERMPD
- if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask))
+ // Check for lane crossing permutes.
+ if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
+ // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
+ if (Subtarget.hasAVX2() && SrcVT.is256BitVector() && Mask.size() == 4) {
+ Shuffle = X86ISD::VPERMI;
+ ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
+ PermuteImm = getV4X86ShuffleImm(Mask);
+ return true;
+ }
+ if (Subtarget.hasAVX512() && SrcVT.is512BitVector() && Mask.size() == 8) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
+ Shuffle = X86ISD::VPERMI;
+ ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
+ PermuteImm = getV4X86ShuffleImm(RepeatedMask);
+ return true;
+ }
+ }
return false;
+ }
// VPERMILPD can permute with a non-repeating shuffle.
if (FloatDomain && MaskScalarSizeInBits == 64) {
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=274908&r1=274907&r2=274908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Fri Jul 8 14:23:29 2016
@@ -184,6 +184,24 @@ define <4 x double> @combine_permd_as_vp
ret <4 x double> %4
}
+define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
+; CHECK-LABEL: combine_permd_as_permq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
+; CHECK-NEXT: retq
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
+ ret <8 x i32> %1
+}
+
+define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
+; CHECK-LABEL: combine_permps_as_permpd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
+; CHECK-NEXT: retq
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
+ ret <8 x float> %1
+}
+
define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
; CHECK-LABEL: combine_pshufb_as_pslldq:
; CHECK: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=274908&r1=274907&r2=274908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Fri Jul 8 14:23:29 2016
@@ -399,6 +399,44 @@ define <8 x i64> @combine_permvar_as_vpb
ret <8 x i64> %1
}
+define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) {
+; CHECK-LABEL: combine_permvar_8i64_as_permq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
+; CHECK-NEXT: retq
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 -1)
+ ret <8 x i64> %1
+}
+define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
+; CHECK-LABEL: combine_permvar_8i64_as_permq_mask:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 %m)
+ ret <8 x i64> %1
+}
+
+define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
+; CHECK-LABEL: combine_permvar_8f64_as_permpd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
+; CHECK-NEXT: retq
+ %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 -1)
+ ret <8 x double> %1
+}
+define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
+; CHECK-LABEL: combine_permvar_8f64_as_permpd_mask:
+; CHECK: # BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
+ ret <8 x double> %1
+}
+
define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) {
; CHECK-LABEL: combine_pshufb_as_pslldq:
; CHECK: # BB#0:
More information about the llvm-commits
mailing list