[llvm] [SelectionDAG]: Add more cases for UDIV, SDIV, SRA, and SRL (PR #89522)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 9 13:20:13 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/89522
>From f11a72617199051f0dc6d198de26722516589b57 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 20 Apr 2024 20:32:38 -0400
Subject: [PATCH 1/2] [SelectionDAG] Pre-commit tests (NFC)
---
llvm/test/CodeGen/X86/known-never-zero.ll | 231 +++++++++++++++++-----
1 file changed, 183 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index f0504e7dbdb65..2ef6bdc8eda94 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1108,12 +1108,14 @@ define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
ret i32 %r
}
-define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
-; X86-LABEL: udiv_maybe_zero:
+define i32 @udiv_known_nonzero_2(i32 %xx, i32 %y) {
+; X86-LABEL: udiv_known_nonzero_2:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: divl %ecx
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je .LBB37_1
; X86-NEXT: # %bb.2: # %cond.false
@@ -1123,9 +1125,10 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
-; X64-LABEL: udiv_maybe_zero:
+; X64-LABEL: udiv_known_nonzero_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: testl %eax, %eax
@@ -1135,6 +1138,40 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: retq
; X64-NEXT: .LBB37_1:
; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = or i32 %xx, %y
+ %z = udiv i32 %x, %y
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
+define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
+; X86-LABEL: udiv_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB38_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB38_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: udiv_maybe_zero:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: divl %esi
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB38_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB38_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1165,6 +1202,104 @@ define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
ret i32 %r
}
+define i32 @sdiv_known_nonzero_2(i32 %xx, i32 %y) {
+; X86-LABEL: sdiv_known_nonzero_2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: cmovnsl %ecx, %esi
+; X86-NEXT: cltd
+; X86-NEXT: idivl %esi
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB40_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+; X86-NEXT: .LBB40_1:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sdiv_known_nonzero_2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: cmovnsl %esi, %ecx
+; X64-NEXT: cltd
+; X64-NEXT: idivl %ecx
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB40_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB40_1:
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %xx, i1 false)
+ %yy = call i32 @llvm.abs.i32(i32 %y, i1 false)
+ %yyneg = sub nsw i32 0, %yy
+ %z = sdiv i32 %x, %yyneg
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
+define i32 @udiv_known_nonzero_uge(i32 %xx) {
+; X86-LABEL: udiv_known_nonzero_uge:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: leal 1(%ecx), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl %ecx
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB41_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB41_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: udiv_known_nonzero_uge:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: cmovsl %edi, %ecx
+; X64-NEXT: leal 1(%rcx), %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: divl %ecx
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB41_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB41_1:
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %xx, i1 false)
+ %yy = add i32 %x, 1
+ %z = udiv i32 %yy, %x
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: sdiv_maybe_zero:
; X86: # %bb.0:
@@ -1172,11 +1307,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: cltd
; X86-NEXT: idivl {{[0-9]+}}(%esp)
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB39_1
+; X86-NEXT: je .LBB42_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB39_1:
+; X86-NEXT: .LBB42_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1186,11 +1321,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB39_1
+; X64-NEXT: je .LBB42_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB39_1:
+; X64-NEXT: .LBB42_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
@@ -1225,11 +1360,11 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: je .LBB41_1
+; X86-NEXT: je .LBB44_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB41_1:
+; X86-NEXT: .LBB44_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1237,11 +1372,11 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
-; X64-NEXT: je .LBB41_1
+; X64-NEXT: je .LBB44_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB41_1:
+; X64-NEXT: .LBB44_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%x = or i32 %xx, 1
@@ -1308,11 +1443,11 @@ define i32 @sub_maybe_zero(i32 %x) {
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: je .LBB44_1
+; X86-NEXT: je .LBB47_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB44_1:
+; X86-NEXT: .LBB47_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1321,11 +1456,11 @@ define i32 @sub_maybe_zero(i32 %x) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: subl %edi, %eax
-; X64-NEXT: je .LBB44_1
+; X64-NEXT: je .LBB47_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB44_1:
+; X64-NEXT: .LBB47_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %x, 64
@@ -1339,22 +1474,22 @@ define i32 @sub_maybe_zero2(i32 %x) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: negl %eax
-; X86-NEXT: je .LBB45_1
+; X86-NEXT: je .LBB48_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB45_1:
+; X86-NEXT: .LBB48_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
; X64-NEXT: negl %edi
-; X64-NEXT: je .LBB45_1
+; X64-NEXT: je .LBB48_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB45_1:
+; X64-NEXT: .LBB48_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sub i32 0, %x
@@ -1369,11 +1504,11 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB46_1
+; X86-NEXT: je .LBB49_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB46_1:
+; X86-NEXT: .LBB49_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1382,11 +1517,11 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB46_1
+; X64-NEXT: je .LBB49_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB46_1:
+; X64-NEXT: .LBB49_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
@@ -1402,11 +1537,11 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB47_1
+; X86-NEXT: je .LBB50_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB47_1:
+; X86-NEXT: .LBB50_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1415,11 +1550,11 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB47_1
+; X64-NEXT: je .LBB50_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB47_1:
+; X64-NEXT: .LBB50_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
@@ -1434,11 +1569,11 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB48_1
+; X86-NEXT: je .LBB51_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB48_1:
+; X86-NEXT: .LBB51_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1446,11 +1581,11 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: imull %esi, %edi
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB48_1
+; X64-NEXT: je .LBB51_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB48_1:
+; X64-NEXT: .LBB51_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
@@ -1497,11 +1632,11 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X86: # %bb.0:
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB50_1
+; X86-NEXT: je .LBB53_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB50_1:
+; X86-NEXT: .LBB53_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1509,11 +1644,11 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB50_1
+; X64-NEXT: je .LBB53_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB50_1:
+; X64-NEXT: .LBB53_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = bitcast <2 x i16> %x to i32
@@ -1527,11 +1662,11 @@ define i32 @bitcast_from_float(float %x) {
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB51_1
+; X86-NEXT: je .LBB54_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB51_1:
+; X86-NEXT: .LBB54_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1539,11 +1674,11 @@ define i32 @bitcast_from_float(float %x) {
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB51_1
+; X64-NEXT: je .LBB54_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB51_1:
+; X64-NEXT: .LBB54_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = bitcast float %x to i32
@@ -1581,24 +1716,24 @@ define i32 @zext_maybe_zero(i16 %x) {
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB53_1
+; X86-NEXT: je .LBB56_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB53_1:
+; X86-NEXT: .LBB56_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: zext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB53_1
+; X64-NEXT: je .LBB56_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB53_1:
+; X64-NEXT: .LBB56_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = zext i16 %x to i32
@@ -1636,23 +1771,23 @@ define i32 @sext_maybe_zero(i16 %x) {
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB55_1
+; X86-NEXT: je .LBB58_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB55_1:
+; X86-NEXT: .LBB58_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB55_1
+; X64-NEXT: je .LBB58_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movswl %di, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB55_1:
+; X64-NEXT: .LBB58_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sext i16 %x to i32
>From 360ed74c48d24c8a2d08c761b2bc7050bdbd4a3a Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 20 Apr 2024 19:48:22 -0400
Subject: [PATCH 2/2] [SelectionDAG]: Add more cases for UDIV, SDIV, SRA, and
SRL
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 103 +++++++++++++++++-
llvm/lib/Support/KnownBits.cpp | 6 +
2 files changed, 103 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 523d3aea66225..aeb4f09ef5450 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5580,27 +5580,98 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
+ APInt MaxCnt = Shift.getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
+ // Similar to udiv but we try to see if we can turn it into a division
+ const KnownBits One =
+ KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
+
+ std::optional<bool> uge =
+ KnownBits::uge(ValKnown, KnownBits::shl(One, Shift));
+ if (uge && *uge)
+ return true;
break;
}
- case ISD::UDIV:
- case ISD::SDIV:
+ case ISD::UDIV: {
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ // True if Op0 u>= Op1
+
+ std::optional<bool> uge = KnownBits::uge(Op0, Op1);
+ if (uge && *uge)
+ return true;
+ break;
+ }
+ case ISD::SDIV: {
// div exact can only produce a zero if the dividend is zero.
- // TODO: For udiv this is also true if Op1 u<= Op0
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op0.isNegative() && Op1.isStrictlyPositive())
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isNegative())
+ return true;
+
+ // For negative numbers, the comparison is reversed. Op0 <= Op1
+ if (Op0.isNegative() && Op1.isNegative()) {
+ std::optional<bool> sle = KnownBits::sle(Op0, Op1);
+ if (sle && *sle)
+ return true;
+ }
+
+ if (Op0.isStrictlyPositive() && Op1.isStrictlyPositive()) {
+ std::optional<bool> uge = KnownBits::uge(Op0, Op1);
+ if (uge && *uge)
+ return true;
+ }
break;
+ }
- case ISD::ADD:
+ case ISD::ADD: {
if (Op->getFlags().hasNoUnsignedWrap())
if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1))
return true;
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (Op0.isNonNegative() && Op1.isNonNegative())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (Op0.isNegative() && Op1.isNegative()) {
+ APInt Mask = APInt::getSignedMaxValue(Op0.getBitWidth());
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ if (Op0.One.intersects(Mask))
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ if (Op1.One.intersects(Mask))
+ return true;
+ }
+
+ if (KnownBits::computeForAddSub(
+ /*Add=*/true, Op->getFlags().hasNoSignedWrap(),
+ Op->getFlags().hasNoUnsignedWrap(), Op0, Op1)
+ .isNonZero())
+ return true;
+
// TODO: There are a lot more cases we can prove for add.
break;
+ }
case ISD::SUB: {
if (isNullConstant(Op.getOperand(0)))
@@ -5612,12 +5683,32 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return ne && *ne;
}
- case ISD::MUL:
+ case ISD::MUL: {
if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
if (isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1))
return true;
+
+ KnownBits XKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (XKnown.One[0])
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1))
+ return true;
+
+ KnownBits YKnown = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (YKnown.One[0])
+ if (XKnown.isNonZero() || isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+
+ // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
+ // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
+ // the lowest known One of X and Y. If they are non-zero, the result
+ // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
+ // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
+ if (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros() <
+ XKnown.getBitWidth())
+ return true;
break;
+ }
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index e3ad1468874cd..2510ab834b76e 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -969,6 +969,9 @@ KnownBits KnownBits::sdiv(const KnownBits &LHS, const KnownBits &RHS,
Res = (Num.isMinSignedValue() && Denom.isAllOnes())
? APInt::getSignedMaxValue(BitWidth)
: Num.sdiv(Denom);
+ std::optional<bool> sle = KnownBits::sle(LHS, RHS);
+ if (sle && *sle)
+ Known.makeGE(APInt(BitWidth, 1));
} else if (LHS.isNegative() && RHS.isNonNegative()) {
// Result is negative if Exact OR -LHS u>= RHS.
if (Exact || (-LHS.getSignedMaxValue()).uge(RHS.getSignedMaxValue())) {
@@ -1022,6 +1025,9 @@ KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS,
Known.Zero.setHighBits(LeadZ);
Known = divComputeLowBit(Known, LHS, RHS, Exact);
+ std::optional<bool> uge = KnownBits::uge(LHS, RHS);
+ if (uge && *uge)
+ Known.makeGE(APInt(BitWidth, 1));
return Known;
}
More information about the llvm-commits
mailing list