[llvm] r345824 - [X86][SSE] Move 2-input limit up from getFauxShuffleMask to resolveTargetShuffleInputs (reapplied)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 1 04:52:09 PDT 2018
Author: rksimon
Date: Thu Nov 1 04:52:09 2018
New Revision: 345824
URL: http://llvm.org/viewvc/llvm-project?rev=345824&view=rev
Log:
[X86][SSE] Move 2-input limit up from getFauxShuffleMask to resolveTargetShuffleInputs (reapplied)
Reapplying an updated version of rL345395 (reverted in rL345451), now the issues noticed in PR39483 have been fixed.
This patch allows resolveTargetShuffleInputs to remove UNDEF inputs from cases where we have more than 2 inputs.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=345824&r1=345823&r2=345824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 1 04:52:09 2018
@@ -6325,9 +6325,6 @@ static bool getFauxShuffleMask(SDValue N
if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
!resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
return false;
- // TODO - Add support for more than 2 inputs.
- if ((SrcInputs0.size() + SrcInputs1.size()) > 2)
- return false;
int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
@@ -6386,8 +6383,7 @@ static bool getFauxShuffleMask(SDValue N
}
Mask[i + InsertIdx] = M;
}
- // TODO - Add support for more than 1 subinput.
- return Ops.size() <= 2;
+ return true;
}
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
@@ -6580,7 +6576,8 @@ static bool resolveTargetShuffleInputs(S
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
- return true;
+ // TODO - Add support for more than 2 inputs.
+ return Inputs.size() <= 2;
}
/// Returns the scalar element that will make up the ith
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=345824&r1=345823&r2=345824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Thu Nov 1 04:52:09 2018
@@ -4019,11 +4019,11 @@ define <4 x double> @test_masked_8xdoubl
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm3[1]
-; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
-; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,0,1,1]
-; CHECK-NEXT: vmovapd %ymm1, %ymm0
+; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,5,5]
+; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
+; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
+; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
@@ -4034,11 +4034,12 @@ define <4 x double> @test_masked_8xdoubl
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
-; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
-; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,1,1]
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1,1,5,5]
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
+; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
More information about the llvm-commits
mailing list