[llvm] [DAG] isKnownNeverZero - add DemandedElts for ISD::SMIN/SMAX (PR #184054)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 01:57:24 PST 2026


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/184054

>From b3e0dace8c6b419504962b7c2d2e879188d483a7 Mon Sep 17 00:00:00 2001
From: Aaron Smull <a.p.smull at gmail.com>
Date: Sun, 1 Mar 2026 17:53:54 -0800
Subject: [PATCH] DemandedElts for Smin and Smax in isKnownNeverZero

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +++---
 llvm/test/CodeGen/X86/known-never-zero.ll     | 53 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 084700c50bd06..7c0d1943b39a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6272,34 +6272,34 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
     // For smin/smax: If either operand is known negative/positive
     // respectively we don't need the other to be known at all.
   case ISD::SMAX: {
-    KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+    KnownBits Op1 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     if (Op1.isStrictlyPositive())
       return true;
 
-    KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+    KnownBits Op0 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (Op0.isStrictlyPositive())
       return true;
 
     if (Op1.isNonZero() && Op0.isNonZero())
       return true;
 
-    return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
-           isKnownNeverZero(Op.getOperand(0), Depth + 1);
+    return isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1) &&
+           isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
   }
   case ISD::SMIN: {
-    KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+    KnownBits Op1 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     if (Op1.isNegative())
       return true;
 
-    KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+    KnownBits Op0 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (Op0.isNegative())
       return true;
 
     if (Op1.isNonZero() && Op0.isNonZero())
       return true;
 
-    return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
-           isKnownNeverZero(Op.getOperand(0), Depth + 1);
+    return isKnownNeverZero(Op.getOperand(1), DemandedElts, Depth + 1) &&
+           isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
   }
   case ISD::UMIN:
     return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 12bb486d8dceb..45a52c7924bd3 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
 
@@ -541,6 +541,32 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @smin_known_never_zero_vec_element(<4 x i32> %x) {
+; X86-LABEL: smin_known_never_zero_vec_element:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [54,4294967273,12,1]
+; X86-NEXT:    movdqa %xmm1, %xmm2
+; X86-NEXT:    pcmpgtd %xmm0, %xmm2
+; X86-NEXT:    pand %xmm2, %xmm0
+; X86-NEXT:    pandn %xmm1, %xmm2
+; X86-NEXT:    por %xmm0, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
+; X86-NEXT:    movd %xmm0, %eax
+; X86-NEXT:    rep bsfl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: smin_known_never_zero_vec_element:
+; X64:       # %bb.0:
+; X64-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vpextrd $1, %xmm0, %eax
+; X64-NEXT:    rep bsfl %eax, %eax
+; X64-NEXT:    retq
+  %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 12, i32 1>)
+  %el = extractelement <4 x i32> %z, i32 1
+  %r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
+  ret i32 %r
+}
+
 define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
 ; X86-LABEL: smax_known_nonzero:
 ; X86:       # %bb.0:
@@ -653,6 +679,31 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @smax_known_never_zero_vec_element(<4 x i32> %x) {
+; X86-LABEL: smax_known_never_zero_vec_element:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [54,4294967273,4294967284,4294967295]
+; X86-NEXT:    movdqa %xmm0, %xmm2
+; X86-NEXT:    pcmpgtd %xmm1, %xmm2
+; X86-NEXT:    pand %xmm2, %xmm0
+; X86-NEXT:    pandn %xmm1, %xmm2
+; X86-NEXT:    por %xmm0, %xmm2
+; X86-NEXT:    movd %xmm2, %eax
+; X86-NEXT:    rep bsfl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: smax_known_never_zero_vec_element:
+; X64:       # %bb.0:
+; X64-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vmovd %xmm0, %eax
+; X64-NEXT:    rep bsfl %eax, %eax
+; X64-NEXT:    retq
+  %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 -23, i32 -12, i32 -1>)
+  %el = extractelement <4 x i32> %z, i32 0
+  %r = call i32 @llvm.cttz.i32(i32 %el, i1 false)
+  ret i32 %r
+}
+
 define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
 ; X86-LABEL: rotr_known_nonzero:
 ; X86:       # %bb.0:



More information about the llvm-commits mailing list