[llvm] r361270 - [X86][SSE] computeKnownBitsForTargetNode - add X86ISD::ANDNP support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 08:20:25 PDT 2019
Author: rksimon
Date: Tue May 21 08:20:24 2019
New Revision: 361270
URL: http://llvm.org/viewvc/llvm-project?rev=361270&view=rev
Log:
[X86][SSE] computeKnownBitsForTargetNode - add X86ISD::ANDNP support
Fixes PACKSS-PSHUFB shuffle regressions mentioned on D61692
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_saddo.ll
llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May 21 08:20:24 2019
@@ -31026,6 +31026,15 @@ unsigned X86TargetLowering::ComputeNumSi
// Vector compares return zero/all-bits result values.
return VTBits;
+ case X86ISD::ANDNP: {
+ unsigned Tmp0 =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp0 == 1) return 1; // Early out.
+ unsigned Tmp1 =
+ DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp0, Tmp1);
+ }
+
case X86ISD::CMOV: {
unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp0 == 1) return 1; // Early out.
Modified: llvm/trunk/test/CodeGen/X86/vec_saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_saddo.ll?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_saddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_saddo.ll Tue May 21 08:20:24 2019
@@ -823,26 +823,24 @@ define <16 x i32> @saddo_v16i32(<16 x i3
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm4
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm7
-; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm11
-; AVX1-NEXT: vpcmpgtd %xmm11, %xmm5, %xmm3
+; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm10
+; AVX1-NEXT: vpcmpgtd %xmm10, %xmm5, %xmm3
; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpandn %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm8, %xmm3, %xmm10
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm7
+; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm1
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpaddd %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm7
-; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vpandn %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm1, %xmm7
+; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm4
+; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm7, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm4
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm5, %xmm7
@@ -854,10 +852,9 @@ define <16 x i32> @saddo_v16i32(<16 x i3
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm7, %xmm2
; AVX1-NEXT: vpandn %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm10[0]
+; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm2
-; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm11, %ymm3
+; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm10, %ymm3
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
@@ -887,22 +884,19 @@ define <16 x i32> @saddo_v16i32(<16 x i3
; AVX2-NEXT: vpandn %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; AVX2-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm7
+; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm5
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpcmpgtd %ymm0, %ymm4, %ymm7
; AVX2-NEXT: vpxor %ymm6, %ymm7, %ymm7
-; AVX2-NEXT: vpcmpgtd %ymm0, %ymm4, %ymm8
-; AVX2-NEXT: vpxor %ymm6, %ymm8, %ymm8
-; AVX2-NEXT: vpcmpeqd %ymm7, %ymm8, %ymm7
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm7, %ymm5
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm0
; AVX2-NEXT: vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0
-; AVX2-NEXT: vpandn %ymm7, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm0
+; AVX2-NEXT: vpandn %ymm5, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb %xmm5, %xmm0, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
Modified: llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ssubo.ll?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ssubo.ll Tue May 21 08:20:24 2019
@@ -856,22 +856,20 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpackssdw %xmm6, %xmm3, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm8, %xmm3, %xmm11
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT: vpcmpgtd %xmm6, %xmm9, %xmm7
-; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm3
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm7
-; AVX1-NEXT: vpsubd %xmm6, %xmm4, %xmm4
+; AVX1-NEXT: vpackssdw %xmm6, %xmm3, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm6
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm3
; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpandn %xmm3, %xmm7, %xmm3
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm3, %xmm6
+; AVX1-NEXT: vpsubd %xmm4, %xmm7, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm7
+; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpandn %xmm3, %xmm6, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm9, %xmm6
; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm9, %xmm7
@@ -884,8 +882,7 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpandn %xmm2, %xmm6, %xmm2
; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm11[0]
+; AVX1-NEXT: vpacksswb %xmm8, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm10, %ymm1, %ymm4
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm0
@@ -918,23 +915,20 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
; AVX2-NEXT: vpandn %ymm1, %ymm5, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; AVX2-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm7
+; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm5
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpcmpgtd %ymm0, %ymm4, %ymm7
; AVX2-NEXT: vpxor %ymm6, %ymm7, %ymm7
-; AVX2-NEXT: vpcmpgtd %ymm0, %ymm4, %ymm8
-; AVX2-NEXT: vpxor %ymm6, %ymm8, %ymm8
-; AVX2-NEXT: vpcmpeqd %ymm7, %ymm8, %ymm7
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm7, %ymm5
; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtd %ymm2, %ymm4, %ymm0
; AVX2-NEXT: vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm0
; AVX2-NEXT: vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT: vpandn %ymm0, %ymm7, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm5, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb %xmm5, %xmm0, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
More information about the llvm-commits
mailing list