[llvm] [SelectionDAG]: Add more cases for UDIV, SDIV, SRA, and SRL (PR #89522)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 20 21:06:25 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/89522
>From e79a8fff2dd9cb6b53e49a5248fea043cc13db60 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 20 Apr 2024 20:32:38 -0400
Subject: [PATCH 1/2] [SelectionDAG] Pre-commit tests (NFC)
---
llvm/test/CodeGen/X86/known-never-zero.ll | 231 +++++++++++++++++-----
1 file changed, 183 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 39d02f9112f4fc..df6b9418adfdde 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1102,12 +1102,14 @@ define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
ret i32 %r
}
-define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
-; X86-LABEL: udiv_maybe_zero:
+define i32 @udiv_known_nonzero_2(i32 %xx, i32 %y) {
+; X86-LABEL: udiv_known_nonzero_2:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: divl %ecx
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je .LBB37_1
; X86-NEXT: # %bb.2: # %cond.false
@@ -1117,9 +1119,10 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
-; X64-LABEL: udiv_maybe_zero:
+; X64-LABEL: udiv_known_nonzero_2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: testl %eax, %eax
@@ -1129,6 +1132,40 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: retq
; X64-NEXT: .LBB37_1:
; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = or i32 %xx, %y
+ %z = udiv i32 %x, %y
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
+define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
+; X86-LABEL: udiv_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB38_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB38_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: udiv_maybe_zero:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: divl %esi
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB38_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB38_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1159,6 +1196,104 @@ define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
ret i32 %r
}
+define i32 @sdiv_known_nonzero_2(i32 %xx, i32 %y) {
+; X86-LABEL: sdiv_known_nonzero_2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: cmovnsl %ecx, %esi
+; X86-NEXT: cltd
+; X86-NEXT: idivl %esi
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB40_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+; X86-NEXT: .LBB40_1:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sdiv_known_nonzero_2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: cmovnsl %esi, %ecx
+; X64-NEXT: cltd
+; X64-NEXT: idivl %ecx
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB40_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB40_1:
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %xx, i1 false)
+ %yy = call i32 @llvm.abs.i32(i32 %y, i1 false)
+ %yyneg = sub nsw i32 0, %yy
+ %z = sdiv i32 %x, %yyneg
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
+define i32 @udiv_known_nonzero_uge(i32 %xx) {
+; X86-LABEL: udiv_known_nonzero_uge:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: leal 1(%ecx), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl %ecx
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB41_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB41_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: udiv_known_nonzero_uge:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: cmovsl %edi, %ecx
+; X64-NEXT: leal 1(%rcx), %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: divl %ecx
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: je .LBB41_1
+; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB41_1:
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.abs.i32(i32 %xx, i1 false)
+ %yy = add i32 %x, 1
+ %z = udiv i32 %yy, %x
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: sdiv_maybe_zero:
; X86: # %bb.0:
@@ -1166,11 +1301,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: cltd
; X86-NEXT: idivl {{[0-9]+}}(%esp)
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB39_1
+; X86-NEXT: je .LBB42_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB39_1:
+; X86-NEXT: .LBB42_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1180,11 +1315,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB39_1
+; X64-NEXT: je .LBB42_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB39_1:
+; X64-NEXT: .LBB42_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
@@ -1219,11 +1354,11 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: je .LBB41_1
+; X86-NEXT: je .LBB44_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB41_1:
+; X86-NEXT: .LBB44_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1231,11 +1366,11 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
-; X64-NEXT: je .LBB41_1
+; X64-NEXT: je .LBB44_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB41_1:
+; X64-NEXT: .LBB44_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%x = or i32 %xx, 1
@@ -1302,11 +1437,11 @@ define i32 @sub_maybe_zero(i32 %x) {
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: orl $64, %eax
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: je .LBB44_1
+; X86-NEXT: je .LBB47_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB44_1:
+; X86-NEXT: .LBB47_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1315,11 +1450,11 @@ define i32 @sub_maybe_zero(i32 %x) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: subl %edi, %eax
-; X64-NEXT: je .LBB44_1
+; X64-NEXT: je .LBB47_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB44_1:
+; X64-NEXT: .LBB47_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %x, 64
@@ -1333,22 +1468,22 @@ define i32 @sub_maybe_zero2(i32 %x) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: negl %eax
-; X86-NEXT: je .LBB45_1
+; X86-NEXT: je .LBB48_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB45_1:
+; X86-NEXT: .LBB48_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
; X64-NEXT: negl %edi
-; X64-NEXT: je .LBB45_1
+; X64-NEXT: je .LBB48_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB45_1:
+; X64-NEXT: .LBB48_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sub i32 0, %x
@@ -1363,11 +1498,11 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB46_1
+; X86-NEXT: je .LBB49_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB46_1:
+; X86-NEXT: .LBB49_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1376,11 +1511,11 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB46_1
+; X64-NEXT: je .LBB49_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB46_1:
+; X64-NEXT: .LBB49_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
@@ -1396,11 +1531,11 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB47_1
+; X86-NEXT: je .LBB50_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB47_1:
+; X86-NEXT: .LBB50_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1409,11 +1544,11 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB47_1
+; X64-NEXT: je .LBB50_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %esi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB47_1:
+; X64-NEXT: .LBB50_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
@@ -1428,11 +1563,11 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB48_1
+; X86-NEXT: je .LBB51_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB48_1:
+; X86-NEXT: .LBB51_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1440,11 +1575,11 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: imull %esi, %edi
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB48_1
+; X64-NEXT: je .LBB51_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB48_1:
+; X64-NEXT: .LBB51_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
@@ -1491,11 +1626,11 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X86: # %bb.0:
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB50_1
+; X86-NEXT: je .LBB53_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB50_1:
+; X86-NEXT: .LBB53_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1503,11 +1638,11 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB50_1
+; X64-NEXT: je .LBB53_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB50_1:
+; X64-NEXT: .LBB53_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = bitcast <2 x i16> %x to i32
@@ -1521,11 +1656,11 @@ define i32 @bitcast_from_float(float %x) {
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB51_1
+; X86-NEXT: je .LBB54_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB51_1:
+; X86-NEXT: .LBB54_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
@@ -1533,11 +1668,11 @@ define i32 @bitcast_from_float(float %x) {
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB51_1
+; X64-NEXT: je .LBB54_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB51_1:
+; X64-NEXT: .LBB54_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = bitcast float %x to i32
@@ -1575,24 +1710,24 @@ define i32 @zext_maybe_zero(i16 %x) {
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB53_1
+; X86-NEXT: je .LBB56_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB53_1:
+; X86-NEXT: .LBB56_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: zext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB53_1
+; X64-NEXT: je .LBB56_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB53_1:
+; X64-NEXT: .LBB56_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = zext i16 %x to i32
@@ -1630,23 +1765,23 @@ define i32 @sext_maybe_zero(i16 %x) {
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB55_1
+; X86-NEXT: je .LBB58_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB55_1:
+; X86-NEXT: .LBB58_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB55_1
+; X64-NEXT: je .LBB58_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movswl %di, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB55_1:
+; X64-NEXT: .LBB58_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%z = sext i16 %x to i32
>From a933f60c1407d53061d5e684390142318e8398c2 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 20 Apr 2024 19:48:22 -0400
Subject: [PATCH 2/2] [SelectionDAG]: Add more cases for UDIV, SDIV, SRA, and
SRL
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 48 +++++++++++++++++--
llvm/lib/Support/KnownBits.cpp | 3 ++
llvm/test/CodeGen/X86/known-pow2.ll | 29 +++++------
3 files changed, 60 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7dbf83b7adeef0..be9af7b58b9480 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5443,19 +5443,59 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
+ APInt MaxCnt = Shift.getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
+ // Similar to udiv but we try to see if we can turn it into a division
+ const KnownBits One =
+ KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
+
+ std::optional<bool> uge =
+ KnownBits::uge(ValKnown, KnownBits::shl(One, Shift));
+ if (uge && *uge)
+ return true;
break;
}
- case ISD::UDIV:
- case ISD::SDIV:
+ case ISD::UDIV: {
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ // True if Op0 u>= Op1
+
+ std::optional<bool> uge = KnownBits::uge(Op0, Op1);
+ if (uge && *uge)
+ return true;
+ break;
+ }
+ case ISD::SDIV: {
// div exact can only produce a zero if the dividend is zero.
- // TODO: For udiv this is also true if Op1 u<= Op0
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op0.isNegative() && Op1.isStrictlyPositive())
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isNegative())
+ return true;
+
+ // For negative numbers, the comparison is reversed. Op0 <= Op1
+ if (Op0.isNegative() && Op1.isNegative()) {
+ std::optional<bool> sle = KnownBits::sle(Op0, Op1);
+ if (sle && *sle)
+ return true;
+ }
+
+ if (Op0.isStrictlyPositive() && Op1.isStrictlyPositive()) {
+ std::optional<bool> uge = KnownBits::uge(Op0, Op1);
+ if (uge && *uge)
+ return true;
+ }
break;
+ }
case ISD::ADD:
if (Op->getFlags().hasNoUnsignedWrap())
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index fe47884f3e55ac..a82310ef7573a6 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -1009,6 +1009,9 @@ KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS,
Known.Zero.setHighBits(LeadZ);
Known = divComputeLowBit(Known, LHS, RHS, Exact);
+ std::optional<bool> uge = KnownBits::uge(LHS, RHS);
+ if (uge && *uge)
+ Known.makeGE(APInt(BitWidth, 1));
assert(!Known.hasConflict() && "Bad Output");
return Known;
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index e183bbc15617d5..31b4731613def9 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -118,12 +118,13 @@ define i1 @pow2_srl_fail0(i32 %x, i32 %y) {
; CHECK-LABEL: pow2_srl_fail0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: notl %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi
-; CHECK-NEXT: testl $1048576, %edi # imm = 0x100000
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: shll %cl, %eax
+; CHECK-NEXT: shrl $20, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%yy = and i32 %y, 30
%d = lshr i32 1048576, %yy
@@ -349,9 +350,8 @@ define i1 @pow2_umax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmoval %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -482,9 +482,8 @@ define i1 @pow2_smax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmovgl %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -555,9 +554,8 @@ define i1 @pow2_select_fail0(i1 %c, i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %r8d
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cmovnel %edx, %r8d
-; CHECK-NEXT: notl %esi
-; CHECK-NEXT: testl %esi, %r8d
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %r8d, %esi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -696,10 +694,9 @@ define <4 x i1> @pow2_vselect_fail0_ne(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y,
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pandn %xmm7, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%zz = lshr <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, %z
More information about the llvm-commits
mailing list