[llvm] r361270 - [X86][SSE] computeKnownBitsForTargetNode - add X86ISD::ANDNP support

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 21 08:20:25 PDT 2019


Author: rksimon
Date: Tue May 21 08:20:24 2019
New Revision: 361270

URL: http://llvm.org/viewvc/llvm-project?rev=361270&view=rev
Log:
[X86][SSE] computeKnownBitsForTargetNode - add X86ISD::ANDNP support

Fixes PACKSS-PSHUFB shuffle regressions mentioned on D61692

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vec_saddo.ll
    llvm/trunk/test/CodeGen/X86/vec_ssubo.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May 21 08:20:24 2019
@@ -31026,6 +31026,15 @@ unsigned X86TargetLowering::ComputeNumSi
     // Vector compares return zero/all-bits result values.
     return VTBits;
 
+  case X86ISD::ANDNP: {
+    unsigned Tmp0 =
+        DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    if (Tmp0 == 1) return 1; // Early out.
+    unsigned Tmp1 =
+        DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    return std::min(Tmp0, Tmp1);
+  }
+
   case X86ISD::CMOV: {
     unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp0 == 1) return 1;  // Early out.

Modified: llvm/trunk/test/CodeGen/X86/vec_saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_saddo.ll?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_saddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_saddo.ll Tue May 21 08:20:24 2019
@@ -823,26 +823,24 @@ define <16 x i32> @saddo_v16i32(<16 x i3
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm5, %xmm4
 ; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
-; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm11
-; AVX1-NEXT:    vpcmpgtd %xmm11, %xmm5, %xmm3
+; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm10
+; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm3
 ; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpandn %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT:    vpshufb %xmm8, %xmm3, %xmm10
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm7
+; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm1
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT:    vpaddd %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm7
-; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpandn %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm1, %xmm7
+; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm4
 ; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm7
@@ -854,10 +852,9 @@ define <16 x i32> @saddo_v16i32(<16 x i3
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
 ; AVX1-NEXT:    vpandn %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpshufb %xmm8, %xmm1, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm10[0]
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm11, %ymm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
 ; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
@@ -887,22 +884,19 @@ define <16 x i32> @saddo_v16i32(<16 x i3
 ; AVX2-NEXT:    vpandn %ymm5, %ymm1, %ymm1
 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm5
 ; AVX2-NEXT:    vpackssdw %xmm5, %xmm1, %xmm1
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm7
+; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm5
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm4, %ymm7
 ; AVX2-NEXT:    vpxor %ymm6, %ymm7, %ymm7
-; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm4, %ymm8
-; AVX2-NEXT:    vpxor %ymm6, %ymm8, %ymm8
-; AVX2-NEXT:    vpcmpeqd %ymm7, %ymm8, %ymm7
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm7, %ymm5
 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm0
 ; AVX2-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm8, %ymm0
-; AVX2-NEXT:    vpandn %ymm7, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm7, %ymm0
+; AVX2-NEXT:    vpandn %ymm5, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
 ; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpshufb %xmm5, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm1

Modified: llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ssubo.ll?rev=361270&r1=361269&r2=361270&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ssubo.ll Tue May 21 08:20:24 2019
@@ -856,22 +856,20 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpandn %xmm3, %xmm7, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT:    vpshufb %xmm8, %xmm3, %xmm11
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm9, %xmm7
-; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm7
-; AVX1-NEXT:    vpsubd %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm3
 ; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpandn %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm6
+; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpandn %xmm3, %xmm6, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm7
@@ -884,8 +882,7 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
 ; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
 ; AVX1-NEXT:    vpandn %xmm2, %xmm6, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpshufb %xmm8, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm11[0]
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm2, %xmm2
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm10, %ymm1, %ymm4
 ; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
@@ -918,23 +915,20 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
 ; AVX2-NEXT:    vpandn %ymm1, %ymm5, %ymm1
 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm5
 ; AVX2-NEXT:    vpackssdw %xmm5, %xmm1, %xmm1
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm7
+; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm5
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm4, %ymm7
 ; AVX2-NEXT:    vpxor %ymm6, %ymm7, %ymm7
-; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm4, %ymm8
-; AVX2-NEXT:    vpxor %ymm6, %ymm8, %ymm8
-; AVX2-NEXT:    vpcmpeqd %ymm7, %ymm8, %ymm7
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm7, %ymm5
 ; AVX2-NEXT:    vpsubd %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm4, %ymm0
 ; AVX2-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm8, %ymm0
+; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm7, %ymm0
 ; AVX2-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX2-NEXT:    vpandn %ymm0, %ymm7, %ymm0
+; AVX2-NEXT:    vpandn %ymm0, %ymm5, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
 ; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpshufb %xmm5, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; AVX2-NEXT:    vpmovsxbd %xmm1, %ymm1




More information about the llvm-commits mailing list