[llvm] aefe8f2 - [DAG] Fold vXi1 multiplies -> and

Fri Feb 26 03:46:26 PST 2021

Author: Simon Pilgrim
Date: 2021-02-26T11:46:12Z
New Revision: aefe8f2f6c9e61b7bee92924b381321b4b5053ec

URL: https://github.com/llvm/llvm-project/commit/aefe8f2f6c9e61b7bee92924b381321b4b5053ec
DIFF: https://github.com/llvm/llvm-project/commit/aefe8f2f6c9e61b7bee92924b381321b4b5053ec.diff

LOG: [DAG] Fold vXi1 multiplies -> and

This allows us to remove X86 custom lowering of vXi1 MUL, which helps simplify a load of mask math.

Mentioned in D97478 post review.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-mask-op.ll
    llvm/test/CodeGen/X86/avx512bw-mask-op.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1538d2ac240d..a571ee35b388 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5323,6 +5323,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(VT.isInteger() && "This operator does not apply to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
+    if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+      return getNode(ISD::AND, DL, VT, N1, N2);
     if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
       const APInt &MulImm = N1->getConstantOperandAPInt(0);
       const APInt &N2CImm = N2C->getAPIntValue();

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9aa76d988a20..b35d9bb5d498 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1468,7 +1468,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
-      setOperationAction(ISD::MUL,              VT, Custom);
       setOperationAction(ISD::UADDSAT,          VT, Custom);
       setOperationAction(ISD::SADDSAT,          VT, Custom);
       setOperationAction(ISD::USUBSAT,          VT, Custom);
@@ -1851,7 +1850,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
 
     for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
-      setOperationAction(ISD::MUL,                VT, Custom);
       setOperationAction(ISD::VSELECT,            VT, Expand);
       setOperationAction(ISD::UADDSAT,            VT, Custom);
       setOperationAction(ISD::SADDSAT,            VT, Custom);
@@ -27261,9 +27259,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
 
-  if (VT.getScalarType() == MVT::i1)
-    return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
-
   // Decompose 256-bit ops into 128-bit ops.
   if (VT.is256BitVector() && !Subtarget.hasInt256())
     return splitVectorIntBinary(Op, DAG);

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index a6fe9396f821..c9f29a689086 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3844,49 +3844,17 @@ define i16 @test_v16i1_sub(i16 %x, i16 %y) {
 }
 
 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
-; KNL-LABEL: test_v16i1_mul:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    kmovw %esi, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_v16i1_mul:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    kmovd %edi, %k0
-; SKX-NEXT:    kmovd %esi, %k1
-; SKX-NEXT:    kandw %k1, %k0, %k0
-; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: test_v16i1_mul:
-; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    kmovd %edi, %k0
-; AVX512BW-NEXT:    kmovd %esi, %k1
-; AVX512BW-NEXT:    kandw %k1, %k0, %k0
-; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: test_v16i1_mul:
-; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    kmovw %edi, %k0
-; AVX512DQ-NEXT:    kmovw %esi, %k1
-; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    kmovw %k0, %eax
-; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: test_v16i1_mul:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: test_v16i1_mul:
 ; X86:       ## %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    kandw %k1, %k0, %k0
-; X86-NEXT:    kmovd %k0, %eax
-; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    retl
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = bitcast i16 %y to <16 x i1>
@@ -3936,49 +3904,17 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
 }
 
 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
-; KNL-LABEL: test_v8i1_mul:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    kmovw %esi, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    ## kill: def $al killed $al killed $eax
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_v8i1_mul:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    kmovd %edi, %k0
-; SKX-NEXT:    kmovd %esi, %k1
-; SKX-NEXT:    kandb %k1, %k0, %k0
-; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    ## kill: def $al killed $al killed $eax
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: test_v8i1_mul:
-; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    kmovd %edi, %k0
-; AVX512BW-NEXT:    kmovd %esi, %k1
-; AVX512BW-NEXT:    kandw %k1, %k0, %k0
-; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: test_v8i1_mul:
-; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    kmovw %edi, %k0
-; AVX512DQ-NEXT:    kmovw %esi, %k1
-; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
-; AVX512DQ-NEXT:    kmovw %k0, %eax
-; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: test_v8i1_mul:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: test_v8i1_mul:
 ; X86:       ## %bb.0:
-; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    kandb %k1, %k0, %k0
-; X86-NEXT:    kmovd %k0, %eax
-; X86-NEXT:    ## kill: def $al killed $al killed $eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = bitcast i8 %y to <8 x i1>
@@ -5141,78 +5077,17 @@ define i1 @test_v1i1_sub(i1 %x, i1 %y) {
 }
 
 define i1 @test_v1i1_mul(i1 %x, i1 %y) {
-; KNL-LABEL: test_v1i1_mul:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    kmovw %esi, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kshiftlw $15, %k0, %k0
-; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; KNL-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_v1i1_mul:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT:    andl $1, %esi
-; SKX-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
-; SKX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k0
-; SKX-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
-; SKX-NEXT:    kandw %k1, %k0, %k0
-; SKX-NEXT:    kshiftlb $7, %k0, %k0
-; SKX-NEXT:    kshiftrb $7, %k0, %k0
-; SKX-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
-; SKX-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: test_v1i1_mul:
-; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    kmovd %edi, %k0
-; AVX512BW-NEXT:    kmovd %esi, %k1
-; AVX512BW-NEXT:    kandw %k1, %k0, %k0
-; AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
-; AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: test_v1i1_mul:
-; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    andl $1, %edi
-; AVX512DQ-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT:    andl $1, %esi
-; AVX512DQ-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k0
-; AVX512DQ-NEXT:    kmovb -{{[0-9]+}}(%rsp), %k1
-; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT:    kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT:    kmovb %k0, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: test_v1i1_mul:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: test_v1i1_mul:
 ; X86:       ## %bb.0:
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    andb $1, %al
-; X86-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    andb $1, %al
-; X86-NEXT:    movb %al, {{[0-9]+}}(%esp)
-; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
-; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    kandw %k1, %k0, %k0
-; X86-NEXT:    kshiftlb $7, %k0, %k0
-; X86-NEXT:    kshiftrb $7, %k0, %k0
-; X86-NEXT:    kmovb %k0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    popl %ecx
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i1 %x to <1 x i1>
   %m1 = bitcast i1 %y to <1 x i1>

diff  --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 139a27e83154..92c126475fc5 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -178,10 +178,8 @@ define i32 @test_v32i1_sub(i32 %x, i32 %y) {
 define i32 @test_v32i1_mul(i32 %x, i32 %y) {
 ; CHECK-LABEL: test_v32i1_mul:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k0
-; CHECK-NEXT:    kmovd %esi, %k1
-; CHECK-NEXT:    kandd %k1, %k0, %k0
-; CHECK-NEXT:    kmovd %k0, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    retq
   %m0 = bitcast i32 %x to <32 x i1>
   %m1 = bitcast i32 %y to <32 x i1>
@@ -219,10 +217,8 @@ define i64 @test_v64i1_sub(i64 %x, i64 %y) {
 define i64 @test_v64i1_mul(i64 %x, i64 %y) {
 ; CHECK-LABEL: test_v64i1_mul:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kmovq %rdi, %k0
-; CHECK-NEXT:    kmovq %rsi, %k1
-; CHECK-NEXT:    kandq %k1, %k0, %k0
-; CHECK-NEXT:    kmovq %k0, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andq %rsi, %rax
 ; CHECK-NEXT:    retq
   %m0 = bitcast i64 %x to <64 x i1>
   %m1 = bitcast i64 %y to <64 x i1>