[llvm] 231b876 - [X86][AVX512] Fold not(kmov(x)) -> kmov(not(x)) and not(widen_subvector(x)) -> widen_subvector(not(x))

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 11 12:14:42 PDT 2021


Author: Simon Pilgrim
Date: 2021-04-11T20:07:09+01:00
New Revision: 231b87618bb61b24674d060721f7004057da9336

URL: https://github.com/llvm/llvm-project/commit/231b87618bb61b24674d060721f7004057da9336
DIFF: https://github.com/llvm/llvm-project/commit/231b87618bb61b24674d060721f7004057da9336.diff

LOG: [X86][AVX512] Fold not(kmov(x)) -> kmov(not(x)) and not(widen_subvector(x)) -> widen_subvector(not(x))

Improve AVX512 mask inversion, rG38c799bce801 exposed some missing opportunities to move scalar not() back onto the boolvector types for folding with setcc etc.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/vector-reduce-and-bool.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0214745f88f3..4ecd9f86322b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46988,6 +46988,28 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
     return RV;
 
+  // Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST &&
+      N0.getOperand(0).getValueType().isVector() &&
+      N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
+      TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) {
+    return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0),
+                                         N0.getOperand(0).getValueType()));
+  }
+
+  // Handle AVX512 mask widening.
+  // Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub))
+  if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() &&
+      VT.getVectorElementType() == MVT::i1 &&
+      N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() &&
+      TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
+    return DAG.getNode(
+        ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+        DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()),
+        N0.getOperand(2));
+  }
+
   // Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2))
   // Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2))
   // TODO: Under what circumstances could this be performed in DAGCombine?

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 3d0b7cd50fcf..30aa8cacd3cd 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -1017,9 +1017,8 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
 ; KNL-LABEL: allzeros_v2i64_not:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $3, %al
 ; KNL-NEXT:    setne %al
 ; KNL-NEXT:    vzeroupper
@@ -1838,9 +1837,8 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
 ; KNL-LABEL: allones_v4i32_and1:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT:    vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
+; KNL-NEXT:    vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $15, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -2154,9 +2152,8 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; KNL-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,1]
-; KNL-NEXT:    vptestmq %zmm1, %zmm0, %k0
+; KNL-NEXT:    vptestnmq %zmm1, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $3, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -2252,9 +2249,8 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
 ; KNL-LABEL: allones_v4i64_and1:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT:    vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
+; KNL-NEXT:    vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $15, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -3159,9 +3155,8 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
 ; KNL-LABEL: allones_v4i32_and4:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT:    vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
+; KNL-NEXT:    vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $15, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -3475,9 +3470,8 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; KNL-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
-; KNL-NEXT:    vptestmq %zmm1, %zmm0, %k0
+; KNL-NEXT:    vptestnmq %zmm1, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $3, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -3573,9 +3567,8 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
 ; KNL-LABEL: allones_v4i64_and4:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT:    vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
+; KNL-NEXT:    vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $15, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -4018,9 +4011,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $-109, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -4029,8 +4021,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; SKX-LABEL: movmsk_v8i16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0
+; SKX-NEXT:    knotb %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    notb %al
 ; SKX-NEXT:    testb $-109, %al
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    retq
@@ -4136,9 +4128,8 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; KNL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $3, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper
@@ -4278,8 +4269,8 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
 ; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; KNL-NEXT:    vcmplepd %zmm0, %zmm1, %k0
+; KNL-NEXT:    knotw %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    notb %al
 ; KNL-NEXT:    testb $3, %al
 ; KNL-NEXT:    sete %al
 ; KNL-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4e0410f97346..50aad826e730 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -31,9 +31,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-LABEL: trunc_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpsllq $63, %xmm0, %xmm0
-; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $3, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -42,9 +41,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-LABEL: trunc_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
-; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $3, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper
@@ -53,9 +51,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
 ; AVX512VL-LABEL: trunc_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpsllq $63, %xmm0, %xmm0
-; AVX512VL-NEXT:    vptestmq %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    notb %al
 ; AVX512VL-NEXT:    testb $3, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
@@ -84,9 +81,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-LABEL: trunc_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $15, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -95,9 +91,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-LABEL: trunc_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $15, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper
@@ -106,9 +101,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
 ; AVX512VL-LABEL: trunc_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    notb %al
 ; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
@@ -244,9 +238,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-LABEL: trunc_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpsllq $63, %ymm0, %ymm0
-; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $15, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -255,9 +248,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-LABEL: trunc_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vpsllq $63, %ymm0, %ymm0
-; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $15, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper
@@ -266,9 +258,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
 ; AVX512VL-LABEL: trunc_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpsllq $63, %ymm0, %ymm0
-; AVX512VL-NEXT:    vptestmq %ymm0, %ymm0, %k0
+; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    notb %al
 ; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    vzeroupper
@@ -875,9 +866,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-LABEL: icmp_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $3, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -886,9 +876,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-LABEL: icmp_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $3, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper
@@ -931,9 +920,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-LABEL: icmp_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $15, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -942,9 +930,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-LABEL: icmp_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $15, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper
@@ -1110,9 +1097,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-LABEL: icmp_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    notb %al
 ; AVX512F-NEXT:    testb $15, %al
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    vzeroupper
@@ -1121,9 +1107,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-LABEL: icmp_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    notb %al
 ; AVX512BW-NEXT:    testb $15, %al
 ; AVX512BW-NEXT:    sete %al
 ; AVX512BW-NEXT:    vzeroupper


        


More information about the llvm-commits mailing list