[llvm] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts handling and tests (PR #183577)
Pranshu Goyal via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 04:39:46 PST 2026
https://github.com/pranshoe updated https://github.com/llvm/llvm-project/pull/183577
>From a1e081a84df50394ce55df0518c588ace55bad4f Mon Sep 17 00:00:00 2001
From: pranshoe <pranshu.goyal71 at gmail.com>
Date: Sun, 1 Mar 2026 04:06:33 +0000
Subject: [PATCH 1/2] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts
handling
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 31a83dd6e0ec0..8d2fcebef855d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6295,12 +6295,14 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
case ISD::SRA:
case ISD::SRL: {
if (Op->getFlags().hasExact())
- return isKnownNeverZero(Op.getOperand(0), Depth + 1);
- KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ return isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits ValKnown =
+ computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1)
+ .getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
>From 947527304d0ce415f796c708fe3a5c3af08870ac Mon Sep 17 00:00:00 2001
From: pranshoe <pranshu.goyal71 at gmail.com>
Date: Sun, 1 Mar 2026 07:40:14 +0000
Subject: [PATCH 2/2] [DAG] isKnownNeverZero - add ISD::SRA/SRL DemandedElts
tests
---
.../CodeGen/X86/sra-neverzero-demandelts.ll | 31 +++++++++++++++++++
.../CodeGen/X86/srl-neverzero-demandelts.ll | 31 +++++++++++++++++++
2 files changed, 62 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
create mode 100644 llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll
diff --git a/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll b/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
new file mode 100644
index 0000000000000..117eeeccf75b1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sra-neverzero-demandelts.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+
+define i32 @sra_neverzero(<4 x i32> %v) {
+; CHECK-LABEL: sra_neverzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: sarl %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
+; CHECK-NEXT: retq
+ %forced_nonzero = or <4 x i32> %v, <i32 2147483648, i32 0, i32 0, i32 2147483648>
+ %shift = ashr <4 x i32> %forced_nonzero, <i32 1, i32 1, i32 1, i32 1>
+ %lane0 = extractelement <4 x i32> %shift, i32 0
+ %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+ ret i32 %res
+}
+
+define i32 @sra_maybezero(<4 x i32> %v) {
+; CHECK-LABEL: sra_maybezero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movd %xmm0, %ecx
+; CHECK-NEXT: sarl %ecx
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: rep bsfl %ecx, %eax
+; CHECK-NEXT: retq
+ %shift = ashr <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+ %lane0 = extractelement <4 x i32> %shift, i32 0
+ %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll b/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll
new file mode 100644
index 0000000000000..319fb491f60cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/srl-neverzero-demandelts.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+
+define i32 @srl_neverzero(<4 x i32> %v) {
+; CHECK-LABEL: srl_neverzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: orl $2, %eax
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
+; CHECK-NEXT: retq
+ %forced_nonzero = or <4 x i32> %v, <i32 2, i32 0, i32 0, i32 2>
+ %shift = lshr <4 x i32> %forced_nonzero, <i32 1, i32 1, i32 1, i32 1>
+ %lane0 = extractelement <4 x i32> %shift, i32 0
+ %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+ ret i32 %res
+}
+
+define i32 @srl_maybezero(<4 x i32> %v) {
+; CHECK-LABEL: srl_maybezero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movd %xmm0, %ecx
+; CHECK-NEXT: shrl %ecx
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: rep bsfl %ecx, %eax
+; CHECK-NEXT: retq
+ %shift = lshr <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+ %lane0 = extractelement <4 x i32> %shift, i32 0
+ %res = call i32 @llvm.cttz.i32(i32 %lane0, i1 0)
+ ret i32 %res
+}
More information about the llvm-commits
mailing list