[llvm] a4c461c - [SelectionDAG] Fill in some more cases in `isKnownNeverZero`
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 12 15:18:17 PDT 2023
Author: Noah Goldstein
Date: 2023-07-12T17:17:53-05:00
New Revision: a4c461c063a2ef229d577d050fef3cff7284fc4b
URL: https://github.com/llvm/llvm-project/commit/a4c461c063a2ef229d577d050fef3cff7284fc4b
DIFF: https://github.com/llvm/llvm-project/commit/a4c461c063a2ef229d577d050fef3cff7284fc4b.diff
LOG: [SelectionDAG] Fill in some more cases in `isKnownNeverZero`
This mostly copies cases that already exist in ValueTracking, although
it skips the more complex ones. Those can be filled in as needed.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D149199
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/divrem-by-select.ll
llvm/test/CodeGen/X86/known-never-zero.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 39375369fac19a..fa7304188a9a82 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5096,14 +5096,95 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
[](ConstantSDNode *C) { return !C->isZero(); }))
return true;
- // TODO: Recognize more cases here.
+ // TODO: Recognize more cases here. Most of the cases are also incomplete to
+ // some degree.
switch (Op.getOpcode()) {
- default: break;
+ default:
+ break;
+
case ISD::OR:
- if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
- isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(2), Depth + 1);
+
+ case ISD::SHL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ // 1 << X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
+ return true;
+ break;
+
+ case ISD::UADDSAT:
+ case ISD::UMAX:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::UMIN:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::ABS:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really
+ // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
return true;
break;
+
+ case ISD::UDIV:
+ case ISD::SDIV:
+ // div exact can only produce a zero if the dividend is zero.
+ // TODO: For udiv this is also true if Op1 u<= Op0
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ break;
+
+ case ISD::ADD:
+ if (Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ // TODO: There are a lot more cases we can prove for add.
+ break;
+
+ case ISD::SUB: {
+ if (isNullConstant(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1);
+
+ std::optional<bool> ne =
+ KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1),
+ computeKnownBits(Op.getOperand(1), Depth + 1));
+ return ne && *ne;
+ }
+
+ case ISD::MUL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ break;
+
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
}
return computeKnownBits(Op, Depth).isNonZero();
diff --git a/llvm/test/CodeGen/X86/divrem-by-select.ll b/llvm/test/CodeGen/X86/divrem-by-select.ll
index 16dea9a380c1f7..c61c9bb1985140 100644
--- a/llvm/test/CodeGen/X86/divrem-by-select.ll
+++ b/llvm/test/CodeGen/X86/divrem-by-select.ll
@@ -69,20 +69,16 @@ define <2 x i64> @udiv_identity_const_todo_getter_nonzero(<2 x i1> %c, <2 x i64>
; CHECK-X64-V4: # %bb.0:
; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1
-; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1]
-; CHECK-X64-V4-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
-; CHECK-X64-V4-NEXT: vpextrq $1, %xmm0, %rcx
-; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax
-; CHECK-X64-V4-NEXT: xorl %edx, %edx
-; CHECK-X64-V4-NEXT: divq %rcx
-; CHECK-X64-V4-NEXT: movq %rax, %rcx
-; CHECK-X64-V4-NEXT: vmovq %xmm0, %rsi
-; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax
-; CHECK-X64-V4-NEXT: xorl %edx, %edx
-; CHECK-X64-V4-NEXT: divq %rsi
+; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rdx
+; CHECK-X64-V4-NEXT: movabsq $-3689348814741910323, %rax # imm = 0xCCCCCCCCCCCCCCCD
+; CHECK-X64-V4-NEXT: mulxq %rax, %rcx, %rcx
; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0
-; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1
-; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vmovq %xmm1, %rdx
+; CHECK-X64-V4-NEXT: mulxq %rax, %rax, %rax
+; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2
+; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1}
+; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-X64-V4-NEXT: retq
;; Fails at the moment because `10` is even so there is no common
@@ -118,23 +114,23 @@ define <2 x i64> @udiv_indentity_non_zero(<2 x i1> %c, <2 x i64> %x, <2 x i64> %
;
; CHECK-X64-V4-LABEL: udiv_indentity_non_zero:
; CHECK-X64-V4: # %bb.0:
-; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1
-; CHECK-X64-V4-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1]
-; CHECK-X64-V4-NEXT: vpsubq %xmm0, %xmm2, %xmm3 {%k1}
-; CHECK-X64-V4-NEXT: vpextrq $1, %xmm3, %rcx
+; CHECK-X64-V4-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; CHECK-X64-V4-NEXT: vpsubq %xmm3, %xmm2, %xmm2
+; CHECK-X64-V4-NEXT: vpextrq $1, %xmm2, %rcx
; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-X64-V4-NEXT: xorl %edx, %edx
; CHECK-X64-V4-NEXT: divq %rcx
; CHECK-X64-V4-NEXT: movq %rax, %rcx
-; CHECK-X64-V4-NEXT: vmovq %xmm3, %rsi
+; CHECK-X64-V4-NEXT: vmovq %xmm2, %rsi
; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax
; CHECK-X64-V4-NEXT: xorl %edx, %edx
; CHECK-X64-V4-NEXT: divq %rsi
+; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1
; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0
-; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1
-; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2
+; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm1 {%k1} = xmm2[0],xmm0[0]
+; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-X64-V4-NEXT: retq
%non_zero = add nsw nuw <2 x i64> %y, <i64 1, i64 1>
%d = select <2 x i1> %c, <2 x i64> %non_zero, <2 x i64> <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 6b911d827885b0..b16f12cd846f02 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -49,9 +49,7 @@ define i32 @select_known_nonzero(i1 %c, i32 %x) {
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: movl $122, %eax
; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 122
@@ -87,9 +85,7 @@ define i32 @shl_known_nonzero_1s_bit_set(i32 %x) {
; CHECK-NEXT: movl $123, %eax
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%z = shl i32 123, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -103,9 +99,7 @@ define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %esi
-; CHECK-NEXT: bsfl %esi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
%y = or i32 %yy, 256
%z = shl nsw i32 %y, %x
@@ -120,9 +114,7 @@ define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %esi
-; CHECK-NEXT: bsfl %esi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
%y = or i32 %yy, 256
%z = shl nuw i32 %y, %x
@@ -155,9 +147,7 @@ define i32 @uaddsat_known_nonzero(i32 %x) {
; CHECK-NEXT: incl %edi
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovnel %edi, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -192,9 +182,7 @@ define i32 @umax_known_nonzero(i32 %x, i32 %y) {
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: cmpl %eax, %edi
; CHECK-NEXT: cmoval %edi, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%yy = shl nuw i32 4, %y
%z = call i32 @llvm.umax.i32(i32 %x, i32 %yy)
@@ -230,9 +218,7 @@ define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
; CHECK-NEXT: addl $4, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmovbl %eax, %esi
-; CHECK-NEXT: bsfl %esi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
@@ -313,9 +299,7 @@ define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: orl $256, %edi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: rorl %cl, %edi
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 256
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
@@ -395,9 +379,7 @@ define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: orl $256, %edi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: roll %cl, %edi
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 256
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
@@ -445,9 +427,7 @@ define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %esi
-; CHECK-NEXT: bsfl %esi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
%y = or i32 %yy, 256
%z = ashr exact i32 %y, %x
@@ -481,9 +461,7 @@ define i32 @srl_known_nonzero_sign_bit_set(i32 %x) {
; CHECK-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%z = lshr i32 2147606891, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -497,9 +475,7 @@ define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %esi
-; CHECK-NEXT: bsfl %esi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
%y = or i32 %yy, 256
%z = lshr exact i32 %y, %x
@@ -533,9 +509,7 @@ define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: orl $64, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %esi
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 64
%z = udiv exact i32 %x, %y
@@ -569,9 +543,7 @@ define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: orl $64, %eax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %esi
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 64
%z = sdiv exact i32 %x, %y
@@ -603,9 +575,7 @@ define i32 @add_known_nonzero(i32 %xx, i32 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: addl %esi, %edi
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 1
%z = add nuw i32 %x, %y
@@ -639,9 +609,7 @@ define i32 @sub_known_nonzero_neg_case(i32 %xx) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: negl %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%x = shl nuw nsw i32 256, %xx
%z = sub i32 0, %x
@@ -656,9 +624,7 @@ define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
; CHECK-NEXT: orl $64, %eax
; CHECK-NEXT: andl $-65, %edi
; CHECK-NEXT: subl %eax, %edi
-; CHECK-NEXT: bsfl %edi, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
%x = or i32 %xx, 64
%y = and i32 %xx, -65
@@ -819,9 +785,7 @@ define i32 @zext_known_nonzero(i16 %xx) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: movzwl %ax, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = zext i16 %x to i32
@@ -854,9 +818,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: cwtl
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = sext i16 %x to i32
More information about the llvm-commits
mailing list