[llvm] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts handling and tests (PR #183577)

Pranshu Goyal via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 04:39:46 PST 2026


https://github.com/pranshoe updated https://github.com/llvm/llvm-project/pull/183577

>From a1e081a84df50394ce55df0518c588ace55bad4f Mon Sep 17 00:00:00 2001
From: pranshoe <pranshu.goyal71 at gmail.com>
Date: Sun, 1 Mar 2026 04:06:33 +0000
Subject: [PATCH 1/2] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts
 handling

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 31a83dd6e0ec0..8d2fcebef855d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6295,12 +6295,14 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
   case ISD::SRA:
   case ISD::SRL: {
     if (Op->getFlags().hasExact())
-      return isKnownNeverZero(Op.getOperand(0), Depth + 1);
-    KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+      return isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits ValKnown =
+        computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (ValKnown.isNegative())
       return true;
     // If max shift cnt of known ones is non-zero, result is non-zero.
-    APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+    APInt MaxCnt = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1)
+                       .getMaxValue();
     if (MaxCnt.ult(ValKnown.getBitWidth()) &&
         !ValKnown.One.lshr(MaxCnt).isZero())
       return true;

>From 947527304d0ce415f796c708fe3a5c3af08870ac Mon Sep 17 00:00:00 2001
From: pranshoe <pranshu.goyal71 at gmail.com>
Date: Sun, 1 Mar 2026 07:40:14 +0000
Subject: [PATCH 2/2] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts
 tests

---
 .../CodeGen/X86/sra-neverzero-demandelts.ll   | 31 +++++++++++++++++++
 .../CodeGen/X86/srl-neverzero-demandelts.ll   | 31 +++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
 create mode 100644 llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll

diff --git a/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll b/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
new file mode 100644
index 0000000000000..117eeeccf75b1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc  -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+
+define i32 @sra_neverzero(<4 x i32> %v) {
+; CHECK-LABEL: sra_neverzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    orl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT:    sarl %eax
+; CHECK-NEXT:    rep bsfl %eax, %eax
+; CHECK-NEXT:    retq
+  %forced_nonzero = or <4 x i32> %v, <i32 2147483648, i32 0, i32 0, i32 2147483648>
+  %shift = ashr <4 x i32> %forced_nonzero, <i32 1, i32 1, i32 1, i32 1>
+  %lane0 = extractelement <4 x i32> %shift, i32 0
+  %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+  ret i32 %res
+}
+
+define i32 @sra_maybezero(<4 x i32> %v) {
+; CHECK-LABEL: sra_maybezero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movd %xmm0, %ecx
+; CHECK-NEXT:    sarl %ecx
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    rep bsfl %ecx, %eax
+; CHECK-NEXT:    retq
+  %shift = ashr <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+  %lane0 = extractelement <4 x i32> %shift, i32 0
+  %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll b/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll
new file mode 100644
index 0000000000000..319fb491f60cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc  -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+
+define i32 @srl_neverzero(<4 x i32> %v) {
+; CHECK-LABEL: srl_neverzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    orl $2, %eax
+; CHECK-NEXT:    shrl %eax
+; CHECK-NEXT:    rep bsfl %eax, %eax
+; CHECK-NEXT:    retq
+  %forced_nonzero = or <4 x i32> %v, <i32 2, i32 0, i32 0, i32 2>
+  %shift = lshr <4 x i32> %forced_nonzero, <i32 1, i32 1, i32 1, i32 1>
+  %lane0 = extractelement <4 x i32> %shift, i32 0
+  %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+  ret i32 %res
+}
+
+define i32 @srl_maybezero(<4 x i32> %v) {
+; CHECK-LABEL: srl_maybezero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movd %xmm0, %ecx
+; CHECK-NEXT:    shrl %ecx
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    rep bsfl %ecx, %eax
+; CHECK-NEXT:    retq
+  %shift = lshr <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+  %lane0 = extractelement <4 x i32> %shift, i32 0
+  %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+  ret i32 %res
+}



More information about the llvm-commits mailing list