[llvm] [DAG] isKnownNeverZero - add DemandedElts for ISD::SMIN/SMAX (PR #184054)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 01:57:24 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/184054
>From b3e0dace8c6b419504962b7c2d2e879188d483a7 Mon Sep 17 00:00:00 2001
From: Aaron Smull <a.p.smull at gmail.com>
Date: Sun, 1 Mar 2026 17:53:54 -0800
Subject: [PATCH] DemandedElts for Smin and Smax in isKnownNeverZero
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +++---
llvm/test/CodeGen/X86/known-never-zero.ll | 53 ++++++++++++++++++-
2 files changed, 60 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 084700c50bd06..7c0d1943b39a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6272,34 +6272,34 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
// For smin/smax: If either operand is known negative/positive
// respectively we don't need the other to be known at all.
case ISD::SMAX: {
- KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Op1.isStrictlyPositive())
return true;
- KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op0.isStrictlyPositive())
return true;
if (Op1.isNonZero() && Op0.isNonZero())
return true;
- return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
- isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ return isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
}
case ISD::SMIN: {
- KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Op1.isNegative())
return true;
- KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Op0.isNegative())
return true;
if (Op1.isNonZero() && Op0.isNonZero())
return true;
- return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
- isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ return isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
}
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 12bb486d8dceb..45a52c7924bd3 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
@@ -541,6 +541,32 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
ret i32 %r
}
+define i32 @smin_known_never_zero_vec_element(<4 x i32> %x) {
+; X86-LABEL: smin_known_never_zero_vec_element:
+; X86: # %bb.0:
+; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,4294967273,12,1]
+; X86-NEXT: movdqa %xmm1, %xmm2
+; X86-NEXT: pcmpgtd %xmm0, %xmm2
+; X86-NEXT: pand %xmm2, %xmm0
+; X86-NEXT: pandn %xmm1, %xmm2
+; X86-NEXT: por %xmm0, %xmm2
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
+; X86-NEXT: movd %xmm0, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: smin_known_never_zero_vec_element:
+; X64: # %bb.0:
+; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpextrd $1, %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+ %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 12, i32 1>)
+ %el = extractelement <4 x i32> %z, i32 1
+ %r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
+ ret i32 %r
+}
+
define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
; X86-LABEL: smax_known_nonzero:
; X86: # %bb.0:
@@ -653,6 +679,31 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
ret i32 %r
}
+define i32 @smax_known_never_zero_vec_element(<4 x i32> %x) {
+; X86-LABEL: smax_known_never_zero_vec_element:
+; X86: # %bb.0:
+; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,4294967273,4294967284,4294967295]
+; X86-NEXT: movdqa %xmm0, %xmm2
+; X86-NEXT: pcmpgtd %xmm1, %xmm2
+; X86-NEXT: pand %xmm2, %xmm0
+; X86-NEXT: pandn %xmm1, %xmm2
+; X86-NEXT: por %xmm0, %xmm2
+; X86-NEXT: movd %xmm2, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: smax_known_never_zero_vec_element:
+; X64: # %bb.0:
+; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
+; X64-NEXT: retq
+ %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 -12, i32 -1>)
+ %el = extractelement <4 x i32> %z, i32 0
+ %r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
+ ret i32 %r
+}
+
define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X86-LABEL: rotr_known_nonzero:
; X86: # %bb.0:
More information about the llvm-commits
mailing list