[llvm] cd62289 - [DAG] isKnownNeverZero - add ISD::ADD/SUB DemandedElts handling and tests (#183958)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 15:03:20 PST 2026
Author: Weiwen He
Date: 2026-03-04T23:03:14Z
New Revision: cd6228983142ef5bf8bf7b5ca45ada749ff91cb0
URL: https://github.com/llvm/llvm-project/commit/cd6228983142ef5bf8bf7b5ca45ada749ff91cb0
DIFF: https://github.com/llvm/llvm-project/commit/cd6228983142ef5bf8bf7b5ca45ada749ff91cb0.diff
LOG: [DAG] isKnownNeverZero - add ISD::ADD/SUB DemandedElts handling and tests (#183958)
Closes #183043
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/known-never-zero.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b831d7cc759cf..f104e6724cdf9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6345,19 +6345,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
case ISD::ADD:
if (Op->getFlags().hasNoUnsignedWrap())
- if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
- isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ if (isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1))
return true;
// TODO: There are a lot more cases we can prove for add.
break;
case ISD::SUB: {
if (isNullConstant(Op.getOperand(0)))
- return isKnownNeverZero(Op.getOperand(1), Depth + 1);
+ return isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1);
- std::optional<bool> ne =
- KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1),
- computeKnownBits(Op.getOperand(1), Depth + 1));
+ std::optional<bool> ne = KnownBits::ne(
+ computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1),
+ computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1));
return ne && *ne;
}
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 4437266e3f9ef..bbc1d9477c6a9 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1759,6 +1759,30 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
ret i32 %r
}
+define i32 @add_nuw_known_nonzero_vec(<4 x i32> %xx, ptr %p) {
+; X86-LABEL: add_nuw_known_nonzero_vec:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: movdqa %xmm0, (%eax)
+; X86-NEXT: movd %xmm0, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: add_nuw_known_nonzero_vec:
+; X64: # %bb.0:
+; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vmovdqa %xmm0, (%rdi)
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+ %z = add nuw <4 x i32> %xx, <i32 1, i32 0, i32 0, i32 0>
+ store <4 x i32> %z, ptr %p
+ %e = extractelement <4 x i32> %z, i32 0
+ %r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
+ ret i32 %r
+}
+
define i32 @sub_known_nonzero_neg_case(i32 %xx) {
; X86-LABEL: sub_known_nonzero_neg_case:
; X86: # %bb.0:
@@ -1857,6 +1881,35 @@ define i32 @sub_maybe_zero2(i32 %x) {
ret i32 %r
}
+define i32 @sub_known_nonzero_ne_vec(<4 x i32> %xx, ptr %p) {
+; X86-LABEL: sub_known_nonzero_ne_vec:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: movd {{.*#+}} xmm1 = [2,0,0,0]
+; X86-NEXT: psubd %xmm0, %xmm1
+; X86-NEXT: movdqa %xmm1, (%eax)
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: sub_known_nonzero_ne_vec:
+; X64: # %bb.0:
+; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,0]
+; X64-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; X64-NEXT: vmovdqa %xmm0, (%rdi)
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+ %u = or <4 x i32> %xx, <i32 1, i32 0, i32 0, i32 0>
+ %z = sub <4 x i32> <i32 2, i32 0, i32 0, i32 0>, %u
+ store <4 x i32> %z, ptr %p
+ %e = extractelement <4 x i32> %z, i32 0
+ %r = call i32 @llvm.cttz.i32(i32 %e, i1 false)
+ ret i32 %r
+}
+
define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-LABEL: mul_known_nonzero_nsw:
; X86: # %bb.0:
More information about the llvm-commits
mailing list