[llvm] [DAG] isKnownNeverZero: Add DemandedElts handling for ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS (PR #184033)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 03:51:57 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/184033
>From 2859336292085d42da26be9ecc5fb898de5bb14a Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sun, 1 Mar 2026 14:54:12 -0500
Subject: [PATCH 1/5] [DAG] isKnownNeverZero: Add DemandedElts handling for
ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
.../X86/known-never-zero-demanded-elts.ll | 78 +++++++++++++++++++
2 files changed, 79 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 31a83dd6e0ec0..c5750b36dc629 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6290,7 +6290,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, const APInt &DemandedElts,
case ISD::BSWAP:
case ISD::CTPOP:
case ISD::ABS:
- return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ return isKnownNeverZero(Op.getOperand(0), DemandedElts, Depth + 1);
case ISD::SRA:
case ISD::SRL: {
diff --git a/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll b/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
new file mode 100644
index 0000000000000..97725d0e4dddd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
@@ -0,0 +1,78 @@
+; RUN: llc -O2 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+; Helper: build vector with lane0 set, other lanes undef.
+
+declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
+declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define i1 @ctpop_lane0_nonzero() {
+; CHECK-LABEL: ctpop_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %v0 = insertelement <4 x i8> undef, i8 8, i64 0 ; lane0 = 8 (non-zero)
+ %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0) ; ctpop(8)=1 -> non-zero
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @bswap_lane0_nonzero() {
+; CHECK-LABEL: bswap_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %v0 = insertelement <4 x i16> undef, i16 1, i64 0 ; lane0 = 1
+ %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0) ; bswap(1)=256 -> non-zero
+ %e0 = extractelement <4 x i16> %w, i64 0
+ %cmp = icmp ne i16 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @bitreverse_lane0_nonzero() {
+; CHECK-LABEL: bitreverse_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %v0 = insertelement <4 x i8> undef, i8 1, i64 0 ; lane0 = 1
+ %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0) ; bitreverse(1)=0x80 -> non-zero
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @rotl_lane0_nonzero() {
+; CHECK-LABEL: rotl_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %x = insertelement <4 x i8> undef, i8 2, i64 0 ; lane0 = 2 (non-zero)
+ %k = insertelement <4 x i8> undef, i8 1, i64 0 ; rotate by 1
+ %w = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @rotr_lane0_nonzero() {
+; CHECK-LABEL: rotr_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %x = insertelement <4 x i8> undef, i8 2, i64 0
+ %k = insertelement <4 x i8> undef, i8 1, i64 0
+ %w = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @abs_lane0_nonzero() {
+; CHECK-LABEL: abs_lane0_nonzero:
+; CHECK: mov{{.*}}1
+; CHECK: ret
+ %v0 = insertelement <4 x i8> undef, i8 -2, i64 0 ; lane0 = -2 (non-zero)
+ %w = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %v0, i1 false)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
\ No newline at end of file
>From e1022800edd244b75efb32b4dae5636afb7462d6 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sun, 1 Mar 2026 15:07:07 -0500
Subject: [PATCH 2/5] [DAG] isKnownNeverZero: Add DemandedElts handling for
ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS v2
---
.../X86/known-never-zero-demanded-elts.ll | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll b/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
index 97725d0e4dddd..34942bf8d9ab5 100644
--- a/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
@@ -1,20 +1,20 @@
; RUN: llc -O2 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
-; Helper: build vector with lane0 set, other lanes undef.
+; Helper: build vector with lane0 set, other lanes poison.
declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
-declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
define i1 @ctpop_lane0_nonzero() {
; CHECK-LABEL: ctpop_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %v0 = insertelement <4 x i8> undef, i8 8, i64 0 ; lane0 = 8 (non-zero)
- %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0) ; ctpop(8)=1 -> non-zero
+ %v0 = insertelement <4 x i8> poison, i8 8, i64 0
+ %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0)
%e0 = extractelement <4 x i8> %w, i64 0
%cmp = icmp ne i8 %e0, 0
ret i1 %cmp
@@ -24,8 +24,8 @@ define i1 @bswap_lane0_nonzero() {
; CHECK-LABEL: bswap_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %v0 = insertelement <4 x i16> undef, i16 1, i64 0 ; lane0 = 1
- %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0) ; bswap(1)=256 -> non-zero
+ %v0 = insertelement <4 x i16> poison, i16 1, i64 0
+ %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0)
%e0 = extractelement <4 x i16> %w, i64 0
%cmp = icmp ne i16 %e0, 0
ret i1 %cmp
@@ -35,8 +35,8 @@ define i1 @bitreverse_lane0_nonzero() {
; CHECK-LABEL: bitreverse_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %v0 = insertelement <4 x i8> undef, i8 1, i64 0 ; lane0 = 1
- %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0) ; bitreverse(1)=0x80 -> non-zero
+ %v0 = insertelement <4 x i8> poison, i8 1, i64 0
+ %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0)
%e0 = extractelement <4 x i8> %w, i64 0
%cmp = icmp ne i8 %e0, 0
ret i1 %cmp
@@ -46,8 +46,8 @@ define i1 @rotl_lane0_nonzero() {
; CHECK-LABEL: rotl_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %x = insertelement <4 x i8> undef, i8 2, i64 0 ; lane0 = 2 (non-zero)
- %k = insertelement <4 x i8> undef, i8 1, i64 0 ; rotate by 1
+ %x = insertelement <4 x i8> poison, i8 2, i64 0
+ %k = insertelement <4 x i8> poison, i8 1, i64 0
%w = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
%e0 = extractelement <4 x i8> %w, i64 0
%cmp = icmp ne i8 %e0, 0
@@ -58,8 +58,8 @@ define i1 @rotr_lane0_nonzero() {
; CHECK-LABEL: rotr_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %x = insertelement <4 x i8> undef, i8 2, i64 0
- %k = insertelement <4 x i8> undef, i8 1, i64 0
+ %x = insertelement <4 x i8> poison, i8 2, i64 0
+ %k = insertelement <4 x i8> poison, i8 1, i64 0
%w = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
%e0 = extractelement <4 x i8> %w, i64 0
%cmp = icmp ne i8 %e0, 0
@@ -70,9 +70,9 @@ define i1 @abs_lane0_nonzero() {
; CHECK-LABEL: abs_lane0_nonzero:
; CHECK: mov{{.*}}1
; CHECK: ret
- %v0 = insertelement <4 x i8> undef, i8 -2, i64 0 ; lane0 = -2 (non-zero)
+ %v0 = insertelement <4 x i8> poison, i8 -2, i64 0
%w = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %v0, i1 false)
%e0 = extractelement <4 x i8> %w, i64 0
%cmp = icmp ne i8 %e0, 0
ret i1 %cmp
-}
\ No newline at end of file
+}
>From 89e5b5f5efa40a30b7fee2fe1e002d4f19e83e6f Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Fri, 6 Mar 2026 10:42:10 -0500
Subject: [PATCH 3/5] [DAG] isKnownNeverZero: Add DemandedElts handling for
ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS update script tests
---
llvm/test/CodeGen/X86/known-never-zero.ll | 110 ++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 12bb486d8dceb..a058d54bade25 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -20,6 +20,12 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare i32 @llvm.abs.i32(i32, i1)
declare i32 @llvm.fshl.i32(i32, i32, i32)
declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
+declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
define i32 @or_known_nonzero(i32 %x) {
; X86-LABEL: or_known_nonzero:
@@ -1465,3 +1471,107 @@ define i32 @sext_maybe_zero(i16 %x) {
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
+
+define i1 @ctpop_lane0_nonzero() {
+; X86-LABEL: ctpop_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: ctpop_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %v0 = insertelement <4 x i8> poison, i8 8, i64 0
+ %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @bswap_lane0_nonzero() {
+; X86-LABEL: bswap_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: bswap_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %v0 = insertelement <4 x i16> poison, i16 1, i64 0
+ %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0)
+ %e0 = extractelement <4 x i16> %w, i64 0
+ %cmp = icmp ne i16 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @bitreverse_lane0_nonzero() {
+; X86-LABEL: bitreverse_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: bitreverse_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %v0 = insertelement <4 x i8> poison, i8 1, i64 0
+ %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @rotl_lane0_nonzero() {
+; X86-LABEL: rotl_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: rotl_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %x = insertelement <4 x i8> poison, i8 2, i64 0
+ %k = insertelement <4 x i8> poison, i8 1, i64 0
+ %w = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @rotr_lane0_nonzero() {
+; X86-LABEL: rotr_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: rotr_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %x = insertelement <4 x i8> poison, i8 2, i64 0
+ %k = insertelement <4 x i8> poison, i8 1, i64 0
+ %w = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
+
+define i1 @abs_lane0_nonzero() {
+; X86-LABEL: abs_lane0_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: abs_lane0_nonzero:
+; X64: # %bb.0:
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+ %v0 = insertelement <4 x i8> poison, i8 -2, i64 0
+ %w = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %v0, i1 false)
+ %e0 = extractelement <4 x i8> %w, i64 0
+ %cmp = icmp ne i8 %e0, 0
+ ret i1 %cmp
+}
>From 215756cb95f1a8993de67e2e370237800bef7d8e Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Fri, 6 Mar 2026 11:43:43 -0500
Subject: [PATCH 4/5] [DAG] isKnownNeverZero: Add DemandedElts handling for
ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS v3
---
.../X86/known-never-zero-demanded-elts.ll | 78 -------------------
1 file changed, 78 deletions(-)
delete mode 100644 llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
diff --git a/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll b/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
deleted file mode 100644
index 34942bf8d9ab5..0000000000000
--- a/llvm/test/CodeGen/X86/known-never-zero-demanded-elts.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: llc -O2 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
-
-; Helper: build vector with lane0 set, other lanes poison.
-
-declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
-declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
-declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
-declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
-declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>)
-declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
-
-define i1 @ctpop_lane0_nonzero() {
-; CHECK-LABEL: ctpop_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %v0 = insertelement <4 x i8> poison, i8 8, i64 0
- %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @bswap_lane0_nonzero() {
-; CHECK-LABEL: bswap_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %v0 = insertelement <4 x i16> poison, i16 1, i64 0
- %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0)
- %e0 = extractelement <4 x i16> %w, i64 0
- %cmp = icmp ne i16 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @bitreverse_lane0_nonzero() {
-; CHECK-LABEL: bitreverse_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %v0 = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @rotl_lane0_nonzero() {
-; CHECK-LABEL: rotl_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %x = insertelement <4 x i8> poison, i8 2, i64 0
- %k = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @rotr_lane0_nonzero() {
-; CHECK-LABEL: rotr_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %x = insertelement <4 x i8> poison, i8 2, i64 0
- %k = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @abs_lane0_nonzero() {
-; CHECK-LABEL: abs_lane0_nonzero:
-; CHECK: mov{{.*}}1
-; CHECK: ret
- %v0 = insertelement <4 x i8> poison, i8 -2, i64 0
- %w = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %v0, i1 false)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
>From 4fc5a33e9c7ee3fdfb86fd6a808ccc5ca6c6ca54 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sat, 7 Mar 2026 06:49:16 -0500
Subject: [PATCH 5/5] [DAG] isKnownNeverZero: Add DemandedElts handling for
ROTL/ROTR/BITREVERSE/BSWAP/CTPOP/ABS v4
---
llvm/test/CodeGen/X86/known-never-zero.ll | 157 +++-------------------
1 file changed, 18 insertions(+), 139 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 27738295fd450..8327a90bdeda5 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1075,9 +1075,7 @@ define i32 @rotr_with_fshr_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p
; X86-NEXT: por %xmm3, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_with_fshr_known_nonzero_vec:
@@ -1099,9 +1097,8 @@ define i32 @rotr_with_fshr_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: vpor %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 256, i32 0, i32 0, i32 0>
%z = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %y)
@@ -1235,9 +1232,7 @@ define i32 @rotl_with_fshl_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p
; X86-NEXT: por %xmm3, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_with_fshl_known_nonzero_vec:
@@ -1257,9 +1252,8 @@ define i32 @rotl_with_fshl_known_nonzero_vec(<4 x i32> %xx, <4 x i32> %y, ptr %p
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: vpor %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 256, i32 0, i32 0, i32 0>
%z = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %y)
@@ -2192,9 +2186,7 @@ define i32 @abs_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-NEXT: psubd %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: abs_known_nonzero_vec:
@@ -2202,9 +2194,8 @@ define i32 @abs_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpabsd %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 0)
@@ -2276,9 +2267,7 @@ define i32 @bswap_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-NEXT: packuswb %xmm2, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bswap_known_nonzero_vec:
@@ -2286,9 +2275,8 @@ define i32 @bswap_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,u,u,u,u,11,10,9,8,15,14,13,12]
; X64-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %x)
@@ -2410,9 +2398,7 @@ define i32 @bitreverse_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitreverse_known_nonzero_vec:
@@ -2429,9 +2415,8 @@ define i32 @bitreverse_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X64-NEXT: vpshufb %xmm0, %xmm1, %xmm0
; X64-NEXT: vpor %xmm0, %xmm2, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %x)
@@ -2572,9 +2557,7 @@ define i32 @ctpop_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X86-NEXT: packuswb %xmm2, %xmm1
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: movd %xmm1, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: ctpop_known_nonzero_vec:
@@ -2595,9 +2578,8 @@ define i32 @ctpop_known_nonzero_vec(<4 x i32> %xx, ptr %p) nounwind {
; X64-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; X64-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdi)
-; X64-NEXT: vmovd %xmm0, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: rep bsfl %ecx, %eax
+; X64-NEXT: vmovd %xmm0, %eax
+; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = or <4 x i32> %xx, <i32 64, i32 -1, i32 0, i32 0>
%z = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
@@ -2742,109 +2724,6 @@ define i32 @sext_maybe_zero(i16 %x) {
ret i32 %r
}
-define i1 @ctpop_lane0_nonzero() {
-; X86-LABEL: ctpop_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: ctpop_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %v0 = insertelement <4 x i8> poison, i8 8, i64 0
- %w = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %v0)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @bswap_lane0_nonzero() {
-; X86-LABEL: bswap_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: bswap_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %v0 = insertelement <4 x i16> poison, i16 1, i64 0
- %w = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v0)
- %e0 = extractelement <4 x i16> %w, i64 0
- %cmp = icmp ne i16 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @bitreverse_lane0_nonzero() {
-; X86-LABEL: bitreverse_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: bitreverse_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %v0 = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %v0)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @rotl_lane0_nonzero() {
-; X86-LABEL: rotl_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: rotl_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %x = insertelement <4 x i8> poison, i8 2, i64 0
- %k = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @rotr_lane0_nonzero() {
-; X86-LABEL: rotr_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: rotr_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %x = insertelement <4 x i8> poison, i8 2, i64 0
- %k = insertelement <4 x i8> poison, i8 1, i64 0
- %w = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %x, <4 x i8> %x, <4 x i8> %k)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
-
-define i1 @abs_lane0_nonzero() {
-; X86-LABEL: abs_lane0_nonzero:
-; X86: # %bb.0:
-; X86-NEXT: movb $1, %al
-; X86-NEXT: retl
-;
-; X64-LABEL: abs_lane0_nonzero:
-; X64: # %bb.0:
-; X64-NEXT: movb $1, %al
-; X64-NEXT: retq
- %v0 = insertelement <4 x i8> poison, i8 -2, i64 0
- %w = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %v0, i1 false)
- %e0 = extractelement <4 x i8> %w, i64 0
- %cmp = icmp ne i8 %e0, 0
- ret i1 %cmp
-}
define i32 @test_zext_demanded_elts(<4 x i32> %a0, ptr %p) {
; X86-LABEL: test_zext_demanded_elts:
; X86: # %bb.0:
More information about the llvm-commits
mailing list