[llvm] Add more cases for UDIV, SDIV, SRA, and SRL (PR #89522)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 20 17:09:32 PDT 2024
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/89522
None
>From d38fba1c484409d7d5136bd0d2414d27dfc32c61 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 20 Apr 2024 19:48:22 -0400
Subject: [PATCH] Add more cases for UDIV, SDIV, SRA, and SRL
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 42 +++++++++++++++++--
llvm/test/CodeGen/X86/known-pow2.ll | 29 ++++++-------
2 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7dbf83b7adeef0..29a2b27db5f8df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5443,19 +5443,53 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
+ APInt MaxCnt = Shift.getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
+ // Similar to udiv but we try to see if we can turn it into a division
+ const KnownBits One =
+ KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
+ if (KnownBits::uge(ValKnown,
+ KnownBits::lshr(One, Shift, Shift.isNonZero())))
+ return true;
break;
}
- case ISD::UDIV:
- case ISD::SDIV:
+ case ISD::UDIV: {
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ // True if Op0 u>= Op1
+ if (KnownBits::uge(Op0, Op1))
+ return true;
+ if (!Op0.getMinValue().udiv(Op1.getMaxValue()).isZero())
+ return true;
+ break;
+ }
+ case ISD::SDIV: {
// div exact can only produce a zero if the dividend is zero.
- // TODO: For udiv this is also true if Op1 u<= Op0
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op0.isNegative() && Op1.isStrictlyPositive())
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isNegative())
+ return true;
+
+ if (Op0.isNegative() && Op1.isNegative() && KnownBits::sge(Op0, Op1))
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isStrictlyPositive() &&
+ (KnownBits::uge(Op0, Op1) ||
+ !Op0.getMinValue().udiv(Op1.getMaxValue()).isZero()))
+ return true;
break;
+ }
case ISD::ADD:
if (Op->getFlags().hasNoUnsignedWrap())
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index e183bbc15617d5..31b4731613def9 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -118,12 +118,13 @@ define i1 @pow2_srl_fail0(i32 %x, i32 %y) {
; CHECK-LABEL: pow2_srl_fail0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: notl %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi
-; CHECK-NEXT: testl $1048576, %edi # imm = 0x100000
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: shll %cl, %eax
+; CHECK-NEXT: shrl $20, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%yy = and i32 %y, 30
%d = lshr i32 1048576, %yy
@@ -349,9 +350,8 @@ define i1 @pow2_umax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmoval %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -482,9 +482,8 @@ define i1 @pow2_smax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmovgl %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -555,9 +554,8 @@ define i1 @pow2_select_fail0(i1 %c, i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %r8d
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cmovnel %edx, %r8d
-; CHECK-NEXT: notl %esi
-; CHECK-NEXT: testl %esi, %r8d
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %r8d, %esi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -696,10 +694,9 @@ define <4 x i1> @pow2_vselect_fail0_ne(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y,
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pandn %xmm7, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%zz = lshr <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, %z
More information about the llvm-commits
mailing list