[llvm] ca5247b - [DAGCombiner] Don't skip no overflow check on UMULO if the first computeKnownBits call doesn't return any 0 bits.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 28 08:32:34 PST 2021
Author: Craig Topper
Date: 2021-02-28T08:26:22-08:00
New Revision: ca5247bb1770a1dfa56b78303d99f1cc9a0a06ee
URL: https://github.com/llvm/llvm-project/commit/ca5247bb1770a1dfa56b78303d99f1cc9a0a06ee
DIFF: https://github.com/llvm/llvm-project/commit/ca5247bb1770a1dfa56b78303d99f1cc9a0a06ee.diff
LOG: [DAGCombiner] Don't skip no overflow check on UMULO if the first computeKnownBits call doesn't return any 0 bits.
Even if the first computeKnownBits call doesn't have any zero
bits it is possible the other operand has bitwidth-1 leading zero.
In that case overflow is still impossible. So always call computeKnownBits
for both operands.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/vec_umulo.ll
llvm/test/CodeGen/X86/vec_umulo.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 686c7a47b352..01b98737f939 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4672,14 +4672,12 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
} else {
KnownBits N1Known = DAG.computeKnownBits(N1);
- if (N1Known.Zero.getBoolValue()) {
- KnownBits N0Known = DAG.computeKnownBits(N0);
- bool Overflow;
- (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
- if (!Overflow)
- return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
- }
+ KnownBits N0Known = DAG.computeKnownBits(N0);
+ bool Overflow;
+ (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
+ if (!Overflow)
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
}
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll
index c84c76f7f88d..d703e7638292 100644
--- a/llvm/test/CodeGen/AArch64/vec_umulo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll
@@ -291,23 +291,18 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind {
; CHECK-LABEL: umulo_v4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.4h, #1
-; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: mul v1.4h, v0.4h, v1.4h
-; CHECK-NEXT: umov w9, v1.h[1]
-; CHECK-NEXT: umov w8, v1.h[0]
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: umov w9, v0.h[1]
+; CHECK-NEXT: umov w8, v0.h[0]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: bfi w8, w9, #1, #1
-; CHECK-NEXT: umov w9, v1.h[2]
+; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: and w9, w9, #0x1
-; CHECK-NEXT: ushr v0.4h, v1.4h, #1
; CHECK-NEXT: bfi w8, w9, #2, #1
-; CHECK-NEXT: umov w9, v1.h[3]
-; CHECK-NEXT: cmtst v0.4h, v0.4h, v0.4h
+; CHECK-NEXT: umov w9, v0.h[3]
; CHECK-NEXT: bfi w8, w9, #3, #29
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: and w8, w8, #0xf
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 7f9f39419954..5d29e20888a0 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -3172,240 +3172,40 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind {
; SSE-LABEL: umulo_v4i1:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pmaddwd %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $1, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: pslld $31, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
; SSE-NEXT: movb %al, (%rdi)
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: umulo_v4i1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
-; AVX1-NEXT: vmovmskps %xmm1, %eax
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: umulo_v4i1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
-; AVX2-NEXT: vmovmskps %xmm1, %eax
-; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: retq
+; AVX-LABEL: umulo_v4i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: movb %al, (%rdi)
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: umulo_v4i1:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbx
+; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0
-; AVX512F-NEXT: kshiftrw $3, %k0, %k1
-; AVX512F-NEXT: kmovw %k1, %r8d
-; AVX512F-NEXT: andb $1, %r8b
-; AVX512F-NEXT: vpslld $31, %xmm1, %xmm0
-; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k1
-; AVX512F-NEXT: kshiftrw $3, %k1, %k2
-; AVX512F-NEXT: kmovw %k2, %r9d
-; AVX512F-NEXT: andb $1, %r9b
-; AVX512F-NEXT: kshiftrw $2, %k0, %k2
-; AVX512F-NEXT: kmovw %k2, %r10d
-; AVX512F-NEXT: andb $1, %r10b
-; AVX512F-NEXT: kshiftrw $2, %k1, %k2
-; AVX512F-NEXT: kmovw %k2, %r11d
-; AVX512F-NEXT: andb $1, %r11b
-; AVX512F-NEXT: kshiftrw $1, %k0, %k2
-; AVX512F-NEXT: kmovw %k2, %ecx
-; AVX512F-NEXT: andb $1, %cl
-; AVX512F-NEXT: kshiftrw $1, %k1, %k2
-; AVX512F-NEXT: kmovw %k2, %edx
-; AVX512F-NEXT: andb $1, %dl
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: andb $1, %al
-; AVX512F-NEXT: kmovw %k1, %esi
-; AVX512F-NEXT: andb $1, %sil
-; AVX512F-NEXT: movw $-3, %bx
-; AVX512F-NEXT: kmovw %ebx, %k0
-; AVX512F-NEXT: # kill: def $al killed $al killed $eax
-; AVX512F-NEXT: mulb %sil
-; AVX512F-NEXT: movl %eax, %esi
-; AVX512F-NEXT: testb $2, %al
-; AVX512F-NEXT: setne %al
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: kandw %k0, %k1, %k1
-; AVX512F-NEXT: movl %ecx, %eax
-; AVX512F-NEXT: mulb %dl
-; AVX512F-NEXT: movl %eax, %ecx
-; AVX512F-NEXT: testb $2, %al
-; AVX512F-NEXT: setne %al
-; AVX512F-NEXT: kmovw %eax, %k2
-; AVX512F-NEXT: kshiftlw $15, %k2, %k2
-; AVX512F-NEXT: kshiftrw $14, %k2, %k2
-; AVX512F-NEXT: korw %k2, %k1, %k2
-; AVX512F-NEXT: movw $-5, %ax
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: kandw %k1, %k2, %k2
-; AVX512F-NEXT: movl %r10d, %eax
-; AVX512F-NEXT: mulb %r11b
-; AVX512F-NEXT: movl %eax, %edx
-; AVX512F-NEXT: testb $2, %al
-; AVX512F-NEXT: setne %al
-; AVX512F-NEXT: kmovw %eax, %k3
-; AVX512F-NEXT: kshiftlw $2, %k3, %k3
-; AVX512F-NEXT: korw %k3, %k2, %k2
-; AVX512F-NEXT: kshiftlw $13, %k2, %k2
-; AVX512F-NEXT: kshiftrw $13, %k2, %k2
-; AVX512F-NEXT: movl %r8d, %eax
-; AVX512F-NEXT: mulb %r9b
-; AVX512F-NEXT: # kill: def $al killed $al def $eax
-; AVX512F-NEXT: testb $2, %al
-; AVX512F-NEXT: setne %bl
-; AVX512F-NEXT: kmovw %ebx, %k3
-; AVX512F-NEXT: kshiftlw $3, %k3, %k3
-; AVX512F-NEXT: korw %k3, %k2, %k2
-; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
-; AVX512F-NEXT: andl $1, %esi
-; AVX512F-NEXT: kmovw %esi, %k2
-; AVX512F-NEXT: kandw %k0, %k2, %k0
-; AVX512F-NEXT: kmovw %ecx, %k2
-; AVX512F-NEXT: kshiftlw $15, %k2, %k2
-; AVX512F-NEXT: kshiftrw $14, %k2, %k2
-; AVX512F-NEXT: korw %k2, %k0, %k0
-; AVX512F-NEXT: kandw %k1, %k0, %k0
-; AVX512F-NEXT: kmovw %edx, %k1
-; AVX512F-NEXT: kshiftlw $15, %k1, %k1
-; AVX512F-NEXT: kshiftrw $13, %k1, %k1
-; AVX512F-NEXT: korw %k1, %k0, %k0
-; AVX512F-NEXT: movw $-9, %cx
-; AVX512F-NEXT: kmovw %ecx, %k1
-; AVX512F-NEXT: kandw %k1, %k0, %k0
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: kshiftlw $15, %k1, %k1
-; AVX512F-NEXT: kshiftrw $12, %k1, %k1
-; AVX512F-NEXT: korw %k1, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movb %al, (%rdi)
-; AVX512F-NEXT: popq %rbx
+; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: umulo_v4i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: pushq %rbx
+; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0
-; AVX512BW-NEXT: kshiftrw $3, %k0, %k1
-; AVX512BW-NEXT: kmovd %k1, %r8d
-; AVX512BW-NEXT: andb $1, %r8b
-; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
-; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k1
-; AVX512BW-NEXT: kshiftrw $3, %k1, %k2
-; AVX512BW-NEXT: kmovd %k2, %r9d
-; AVX512BW-NEXT: andb $1, %r9b
-; AVX512BW-NEXT: kshiftrw $2, %k0, %k2
-; AVX512BW-NEXT: kmovd %k2, %r10d
-; AVX512BW-NEXT: andb $1, %r10b
-; AVX512BW-NEXT: kshiftrw $2, %k1, %k2
-; AVX512BW-NEXT: kmovd %k2, %r11d
-; AVX512BW-NEXT: andb $1, %r11b
-; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
-; AVX512BW-NEXT: kmovd %k2, %ecx
-; AVX512BW-NEXT: andb $1, %cl
-; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
-; AVX512BW-NEXT: kmovd %k2, %edx
-; AVX512BW-NEXT: andb $1, %dl
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: andb $1, %al
-; AVX512BW-NEXT: kmovd %k1, %esi
-; AVX512BW-NEXT: andb $1, %sil
-; AVX512BW-NEXT: movw $-3, %bx
-; AVX512BW-NEXT: kmovd %ebx, %k0
-; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: mulb %sil
-; AVX512BW-NEXT: movl %eax, %esi
-; AVX512BW-NEXT: testb $2, %al
-; AVX512BW-NEXT: setne %al
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: kandw %k0, %k1, %k1
-; AVX512BW-NEXT: movl %ecx, %eax
-; AVX512BW-NEXT: mulb %dl
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: testb $2, %al
-; AVX512BW-NEXT: setne %al
-; AVX512BW-NEXT: kmovd %eax, %k2
-; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
-; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
-; AVX512BW-NEXT: korw %k2, %k1, %k2
-; AVX512BW-NEXT: movw $-5, %ax
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: kandw %k1, %k2, %k2
-; AVX512BW-NEXT: movl %r10d, %eax
-; AVX512BW-NEXT: mulb %r11b
-; AVX512BW-NEXT: movl %eax, %edx
-; AVX512BW-NEXT: testb $2, %al
-; AVX512BW-NEXT: setne %al
-; AVX512BW-NEXT: kmovd %eax, %k3
-; AVX512BW-NEXT: kshiftlw $2, %k3, %k3
-; AVX512BW-NEXT: korw %k3, %k2, %k2
-; AVX512BW-NEXT: kshiftlw $13, %k2, %k2
-; AVX512BW-NEXT: kshiftrw $13, %k2, %k2
-; AVX512BW-NEXT: movl %r8d, %eax
-; AVX512BW-NEXT: mulb %r9b
-; AVX512BW-NEXT: # kill: def $al killed $al def $eax
-; AVX512BW-NEXT: testb $2, %al
-; AVX512BW-NEXT: setne %bl
-; AVX512BW-NEXT: kmovd %ebx, %k3
-; AVX512BW-NEXT: kshiftlw $3, %k3, %k3
-; AVX512BW-NEXT: korw %k3, %k2, %k2
-; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
-; AVX512BW-NEXT: andl $1, %esi
-; AVX512BW-NEXT: kmovw %esi, %k2
-; AVX512BW-NEXT: kandw %k0, %k2, %k0
-; AVX512BW-NEXT: kmovd %ecx, %k2
-; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
-; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
-; AVX512BW-NEXT: korw %k2, %k0, %k0
-; AVX512BW-NEXT: kandw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %edx, %k1
-; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
-; AVX512BW-NEXT: kshiftrw $13, %k1, %k1
-; AVX512BW-NEXT: korw %k1, %k0, %k0
-; AVX512BW-NEXT: movw $-9, %cx
-; AVX512BW-NEXT: kmovd %ecx, %k1
-; AVX512BW-NEXT: kandw %k1, %k0, %k0
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
-; AVX512BW-NEXT: kshiftrw $12, %k1, %k1
-; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, (%rdi)
-; AVX512BW-NEXT: popq %rbx
+; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
More information about the llvm-commits
mailing list