[llvm] 5b82543 - [DAGCombiner] Optimize 1-bit smulo to AND+SETNE.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 13 09:48:22 PST 2021


Author: Craig Topper
Date: 2021-03-13T09:39:36-08:00
New Revision: 5b825433d7854f2e79966606ddfb381806cc499c

URL: https://github.com/llvm/llvm-project/commit/5b825433d7854f2e79966606ddfb381806cc499c
DIFF: https://github.com/llvm/llvm-project/commit/5b825433d7854f2e79966606ddfb381806cc499c.diff

LOG: [DAGCombiner] Optimize 1-bit smulo to AND+SETNE.

A 1-bit smulo overflows is both inputs are -1 since the result
should be +1 which can't be represented in a signed 1 bit value.

We can detect this with an AND and a setcc. The multiply result
can also use the same AND.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D97634

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/vec_smulo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 273b7ced3977..120c7f244c6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4661,6 +4661,14 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
                        N->getVTList(), N0, N0);
 
   if (IsSigned) {
+    // A 1 bit SMULO overflows if both inputs are 1.
+    if (VT.getScalarSizeInBits() == 1) {
+      SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
+      return CombineTo(N, And,
+                       DAG.getSetCC(DL, CarryVT, And,
+                                    DAG.getConstant(0, DL, VT), ISD::SETNE));
+    }
+
     // Multiplying n * m significant bits yields a result of n + m significant
     // bits. If the total number of significant bits does not exceed the
     // result bit width (minus 1), there is no overflow.

diff  --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index f9e56f7c7f5c..4fa3367521a5 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3561,304 +3561,58 @@ define <4 x i32> @smulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
 define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind {
 ; SSE2-LABEL: smulo_v4i1:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    pslld $31, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
 ; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT:    pmuludq %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT:    pmuludq %xmm2, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    movmskps %xmm1, %eax
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: smulo_v4i1:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pslld $31, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    pand %xmm1, %xmm0
 ; SSSE3-NEXT:    pslld $31, %xmm0
+; SSSE3-NEXT:    movmskps %xmm0, %eax
 ; SSSE3-NEXT:    psrad $31, %xmm0
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSSE3-NEXT:    pmuludq %xmm1, %xmm0
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSSE3-NEXT:    pmuludq %xmm2, %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    pslld $31, %xmm1
-; SSSE3-NEXT:    movmskps %xmm1, %eax
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSSE3-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSSE3-NEXT:    pxor %xmm1, %xmm0
 ; SSSE3-NEXT:    movb %al, (%rdi)
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: smulo_v4i1:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pslld $31, %xmm1
-; SSE41-NEXT:    psrad $31, %xmm1
+; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pslld $31, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
 ; SSE41-NEXT:    psrad $31, %xmm0
-; SSE41-NEXT:    pmulld %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    pslld $31, %xmm1
-; SSE41-NEXT:    movmskps %xmm1, %eax
-; SSE41-NEXT:    psrad $31, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT:    pxor %xmm0, %xmm1
 ; SSE41-NEXT:    movb %al, (%rdi)
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: smulo_v4i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpslld $31, %xmm1, %xmm1
-; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
-; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpslld $31, %xmm0, %xmm1
-; AVX-NEXT:    vpsrad $31, %xmm1, %xmm2
-; AVX-NEXT:    vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm1, %eax
 ; AVX-NEXT:    movb %al, (%rdi)
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: smulo_v4i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    pushq %rbx
+; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
 ; AVX512F-NEXT:    vptestmd %xmm0, %xmm0, %k0
-; AVX512F-NEXT:    kshiftrw $3, %k0, %k1
-; AVX512F-NEXT:    kmovw %k1, %r8d
-; AVX512F-NEXT:    andb $1, %r8b
-; AVX512F-NEXT:    negb %r8b
-; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm0
-; AVX512F-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; AVX512F-NEXT:    kshiftrw $3, %k1, %k2
-; AVX512F-NEXT:    kmovw %k2, %r9d
-; AVX512F-NEXT:    andb $1, %r9b
-; AVX512F-NEXT:    negb %r9b
-; AVX512F-NEXT:    kshiftrw $2, %k0, %k2
-; AVX512F-NEXT:    kmovw %k2, %r10d
-; AVX512F-NEXT:    andb $1, %r10b
-; AVX512F-NEXT:    negb %r10b
-; AVX512F-NEXT:    kshiftrw $2, %k1, %k2
-; AVX512F-NEXT:    kmovw %k2, %ebx
-; AVX512F-NEXT:    andb $1, %bl
-; AVX512F-NEXT:    negb %bl
-; AVX512F-NEXT:    kshiftrw $1, %k0, %k2
-; AVX512F-NEXT:    kmovw %k2, %ecx
-; AVX512F-NEXT:    andb $1, %cl
-; AVX512F-NEXT:    negb %cl
-; AVX512F-NEXT:    kshiftrw $1, %k1, %k2
-; AVX512F-NEXT:    kmovw %k2, %esi
-; AVX512F-NEXT:    andb $1, %sil
-; AVX512F-NEXT:    negb %sil
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andb $1, %al
-; AVX512F-NEXT:    negb %al
-; AVX512F-NEXT:    kmovw %k1, %edx
-; AVX512F-NEXT:    andb $1, %dl
-; AVX512F-NEXT:    negb %dl
-; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512F-NEXT:    mulb %dl
-; AVX512F-NEXT:    movl %eax, %r11d
-; AVX512F-NEXT:    andb $1, %al
-; AVX512F-NEXT:    negb %al
-; AVX512F-NEXT:    cmpb %r11b, %al
-; AVX512F-NEXT:    setne %al
-; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    movw $-3, %ax
-; AVX512F-NEXT:    kmovw %eax, %k0
-; AVX512F-NEXT:    kandw %k0, %k1, %k1
-; AVX512F-NEXT:    movl %ecx, %eax
-; AVX512F-NEXT:    mulb %sil
-; AVX512F-NEXT:    movl %eax, %ecx
-; AVX512F-NEXT:    andb $1, %al
-; AVX512F-NEXT:    negb %al
-; AVX512F-NEXT:    cmpb %cl, %al
-; AVX512F-NEXT:    setne %al
-; AVX512F-NEXT:    kmovw %eax, %k2
-; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512F-NEXT:    kshiftrw $14, %k2, %k2
-; AVX512F-NEXT:    korw %k2, %k1, %k2
-; AVX512F-NEXT:    movw $-5, %ax
-; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    kandw %k1, %k2, %k2
-; AVX512F-NEXT:    movl %r10d, %eax
-; AVX512F-NEXT:    mulb %bl
-; AVX512F-NEXT:    movl %eax, %edx
-; AVX512F-NEXT:    andb $1, %al
-; AVX512F-NEXT:    negb %al
-; AVX512F-NEXT:    cmpb %dl, %al
-; AVX512F-NEXT:    setne %al
-; AVX512F-NEXT:    kmovw %eax, %k3
-; AVX512F-NEXT:    kshiftlw $2, %k3, %k3
-; AVX512F-NEXT:    korw %k3, %k2, %k2
-; AVX512F-NEXT:    kshiftlw $13, %k2, %k2
-; AVX512F-NEXT:    kshiftrw $13, %k2, %k2
-; AVX512F-NEXT:    movl %r8d, %eax
-; AVX512F-NEXT:    mulb %r9b
-; AVX512F-NEXT:    # kill: def $al killed $al def $eax
-; AVX512F-NEXT:    movl %eax, %ebx
-; AVX512F-NEXT:    andb $1, %bl
-; AVX512F-NEXT:    negb %bl
-; AVX512F-NEXT:    cmpb %al, %bl
-; AVX512F-NEXT:    setne %sil
-; AVX512F-NEXT:    kmovw %esi, %k3
-; AVX512F-NEXT:    kshiftlw $3, %k3, %k3
-; AVX512F-NEXT:    korw %k3, %k2, %k2
-; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k2} {z}
-; AVX512F-NEXT:    andl $1, %r11d
-; AVX512F-NEXT:    kmovw %r11d, %k2
-; AVX512F-NEXT:    kandw %k0, %k2, %k0
-; AVX512F-NEXT:    kmovw %ecx, %k2
-; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512F-NEXT:    kshiftrw $14, %k2, %k2
-; AVX512F-NEXT:    korw %k2, %k0, %k0
-; AVX512F-NEXT:    kandw %k1, %k0, %k0
-; AVX512F-NEXT:    kmovw %edx, %k1
-; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
-; AVX512F-NEXT:    kshiftrw $13, %k1, %k1
-; AVX512F-NEXT:    korw %k1, %k0, %k0
-; AVX512F-NEXT:    movw $-9, %cx
-; AVX512F-NEXT:    kmovw %ecx, %k1
-; AVX512F-NEXT:    kandw %k1, %k0, %k0
-; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
-; AVX512F-NEXT:    kshiftrw $12, %k1, %k1
-; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    vpsrad $31, %xmm0, %xmm0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    movb %al, (%rdi)
-; AVX512F-NEXT:    popq %rbx
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: smulo_v4i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    pushq %rbx
+; AVX512BW-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vptestmd %xmm0, %xmm0, %k0
-; AVX512BW-NEXT:    kshiftrw $3, %k0, %k1
-; AVX512BW-NEXT:    kmovd %k1, %r8d
-; AVX512BW-NEXT:    andb $1, %r8b
-; AVX512BW-NEXT:    negb %r8b
-; AVX512BW-NEXT:    vpslld $31, %xmm1, %xmm0
-; AVX512BW-NEXT:    vptestmd %xmm0, %xmm0, %k1
-; AVX512BW-NEXT:    kshiftrw $3, %k1, %k2
-; AVX512BW-NEXT:    kmovd %k2, %r9d
-; AVX512BW-NEXT:    andb $1, %r9b
-; AVX512BW-NEXT:    negb %r9b
-; AVX512BW-NEXT:    kshiftrw $2, %k0, %k2
-; AVX512BW-NEXT:    kmovd %k2, %r10d
-; AVX512BW-NEXT:    andb $1, %r10b
-; AVX512BW-NEXT:    negb %r10b
-; AVX512BW-NEXT:    kshiftrw $2, %k1, %k2
-; AVX512BW-NEXT:    kmovd %k2, %ebx
-; AVX512BW-NEXT:    andb $1, %bl
-; AVX512BW-NEXT:    negb %bl
-; AVX512BW-NEXT:    kshiftrw $1, %k0, %k2
-; AVX512BW-NEXT:    kmovd %k2, %ecx
-; AVX512BW-NEXT:    andb $1, %cl
-; AVX512BW-NEXT:    negb %cl
-; AVX512BW-NEXT:    kshiftrw $1, %k1, %k2
-; AVX512BW-NEXT:    kmovd %k2, %esi
-; AVX512BW-NEXT:    andb $1, %sil
-; AVX512BW-NEXT:    negb %sil
-; AVX512BW-NEXT:    kmovd %k0, %eax
-; AVX512BW-NEXT:    andb $1, %al
-; AVX512BW-NEXT:    negb %al
-; AVX512BW-NEXT:    kmovd %k1, %edx
-; AVX512BW-NEXT:    andb $1, %dl
-; AVX512BW-NEXT:    negb %dl
-; AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512BW-NEXT:    mulb %dl
-; AVX512BW-NEXT:    movl %eax, %r11d
-; AVX512BW-NEXT:    andb $1, %al
-; AVX512BW-NEXT:    negb %al
-; AVX512BW-NEXT:    cmpb %r11b, %al
-; AVX512BW-NEXT:    setne %al
-; AVX512BW-NEXT:    kmovd %eax, %k1
-; AVX512BW-NEXT:    movw $-3, %ax
-; AVX512BW-NEXT:    kmovd %eax, %k0
-; AVX512BW-NEXT:    kandw %k0, %k1, %k1
-; AVX512BW-NEXT:    movl %ecx, %eax
-; AVX512BW-NEXT:    mulb %sil
-; AVX512BW-NEXT:    movl %eax, %ecx
-; AVX512BW-NEXT:    andb $1, %al
-; AVX512BW-NEXT:    negb %al
-; AVX512BW-NEXT:    cmpb %cl, %al
-; AVX512BW-NEXT:    setne %al
-; AVX512BW-NEXT:    kmovd %eax, %k2
-; AVX512BW-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512BW-NEXT:    kshiftrw $14, %k2, %k2
-; AVX512BW-NEXT:    korw %k2, %k1, %k2
-; AVX512BW-NEXT:    movw $-5, %ax
-; AVX512BW-NEXT:    kmovd %eax, %k1
-; AVX512BW-NEXT:    kandw %k1, %k2, %k2
-; AVX512BW-NEXT:    movl %r10d, %eax
-; AVX512BW-NEXT:    mulb %bl
-; AVX512BW-NEXT:    movl %eax, %edx
-; AVX512BW-NEXT:    andb $1, %al
-; AVX512BW-NEXT:    negb %al
-; AVX512BW-NEXT:    cmpb %dl, %al
-; AVX512BW-NEXT:    setne %al
-; AVX512BW-NEXT:    kmovd %eax, %k3
-; AVX512BW-NEXT:    kshiftlw $2, %k3, %k3
-; AVX512BW-NEXT:    korw %k3, %k2, %k2
-; AVX512BW-NEXT:    kshiftlw $13, %k2, %k2
-; AVX512BW-NEXT:    kshiftrw $13, %k2, %k2
-; AVX512BW-NEXT:    movl %r8d, %eax
-; AVX512BW-NEXT:    mulb %r9b
-; AVX512BW-NEXT:    # kill: def $al killed $al def $eax
-; AVX512BW-NEXT:    movl %eax, %ebx
-; AVX512BW-NEXT:    andb $1, %bl
-; AVX512BW-NEXT:    negb %bl
-; AVX512BW-NEXT:    cmpb %al, %bl
-; AVX512BW-NEXT:    setne %sil
-; AVX512BW-NEXT:    kmovd %esi, %k3
-; AVX512BW-NEXT:    kshiftlw $3, %k3, %k3
-; AVX512BW-NEXT:    korw %k3, %k2, %k2
-; AVX512BW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k2} {z}
-; AVX512BW-NEXT:    andl $1, %r11d
-; AVX512BW-NEXT:    kmovw %r11d, %k2
-; AVX512BW-NEXT:    kandw %k0, %k2, %k0
-; AVX512BW-NEXT:    kmovd %ecx, %k2
-; AVX512BW-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512BW-NEXT:    kshiftrw $14, %k2, %k2
-; AVX512BW-NEXT:    korw %k2, %k0, %k0
-; AVX512BW-NEXT:    kandw %k1, %k0, %k0
-; AVX512BW-NEXT:    kmovd %edx, %k1
-; AVX512BW-NEXT:    kshiftlw $15, %k1, %k1
-; AVX512BW-NEXT:    kshiftrw $13, %k1, %k1
-; AVX512BW-NEXT:    korw %k1, %k0, %k0
-; AVX512BW-NEXT:    movw $-9, %cx
-; AVX512BW-NEXT:    kmovd %ecx, %k1
-; AVX512BW-NEXT:    kandw %k1, %k0, %k0
-; AVX512BW-NEXT:    kmovd %eax, %k1
-; AVX512BW-NEXT:    kshiftlw $15, %k1, %k1
-; AVX512BW-NEXT:    kshiftrw $12, %k1, %k1
-; AVX512BW-NEXT:    korw %k1, %k0, %k0
+; AVX512BW-NEXT:    vpsrad $31, %xmm0, %xmm0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    movb %al, (%rdi)
-; AVX512BW-NEXT:    popq %rbx
 ; AVX512BW-NEXT:    retq
   %t = call {<4 x i1>, <4 x i1>} @llvm.smul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
   %val = extractvalue {<4 x i1>, <4 x i1>} %t, 0


        


More information about the llvm-commits mailing list