[llvm] [DAG] canCreateUndefOrPoison - add handling for CTTZ/CTLZ nodes (PR #146361)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 08:57:49 PDT 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/146361
>From 0df3a687442ecbf10d68b45ae8f289a19207062f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Jun 2025 15:05:03 +0100
Subject: [PATCH 1/2] [DAG] canCreateUndefOrPoison - add handling for CTTZ/CTLZ
nodes
ISD::CTTZ/CTLZ nodes handle all input values and do not create undef/poison.
The *_ZERO_UNDEF variants' results are only non-poison if the input is non-poison and non-zero.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 ++++
llvm/test/CodeGen/X86/freeze-unary.ll | 55 ++++++-------------
2 files changed, 27 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 93cfe6f02bc84..a2b4300c946b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5543,6 +5543,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FSHL:
case ISD::FSHR:
case ISD::BSWAP:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
case ISD::CTPOP:
case ISD::BITREVERSE:
case ISD::PARITY:
@@ -5618,6 +5620,14 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
PoisonOnly, Depth + 1) ||
!getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1);
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ_ZERO_UNDEF:
+ // If the amount is zero then the result will be poison.
+ // TODO: Add isKnownNeverZero DemandedElts handling.
+ return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
+ PoisonOnly, Depth + 1) ||
+ !isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
case ISD::SCALAR_TO_VECTOR:
// Check if we demand any upper (undef) elements.
return !PoisonOnly && DemandedElts.ugt(1);
diff --git a/llvm/test/CodeGen/X86/freeze-unary.ll b/llvm/test/CodeGen/X86/freeze-unary.ll
index 2e707a4fee82f..4ce88f0582412 100644
--- a/llvm/test/CodeGen/X86/freeze-unary.ll
+++ b/llvm/test/CodeGen/X86/freeze-unary.ll
@@ -129,24 +129,17 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
define i32 @freeze_ctlz(i32 %a0) nounwind {
; X86-LABEL: freeze_ctlz:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bsrl %eax, %ecx
-; X86-NEXT: movl $63, %edx
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %edx, %eax
+; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $63, %eax
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: xorl $31, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_ctlz:
; X64: # %bb.0:
-; X64-NEXT: movl $63, %ecx
-; X64-NEXT: bsrl %edi, %ecx
-; X64-NEXT: xorl $31, %ecx
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: movl $63, %eax
+; X64-NEXT: bsrl %edi, %eax
+; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
%x = call i32 @llvm.ctlz.i32(i32 %a0, i1 0)
%f = freeze i32 %x
@@ -187,21 +180,15 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
-; X86-NEXT: bsrl %eax, %ecx
-; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: bsrl %eax, %eax
+; X86-NEXT: xorl $31, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_ctlz_undef_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: bsrl %edi, %ecx
-; X64-NEXT: xorl $31, %ecx
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: bsrl %edi, %eax
+; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
%f0 = freeze i32 %a0
%y = or i32 %f0, 1
@@ -215,19 +202,15 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
define i32 @freeze_cttz(i32 %a0) nounwind {
; X86-LABEL: freeze_cttz:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bsfl %eax, %eax
-; X86-NEXT: movl $32, %ecx
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_cttz:
; X64: # %bb.0:
-; X64-NEXT: movl $32, %ecx
; X64-NEXT: movl $32, %eax
-; X64-NEXT: bsfl %edi, %eax
-; X64-NEXT: cmovel %ecx, %eax
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = call i32 @llvm.cttz.i32(i32 %a0, i1 0)
%f = freeze i32 %x
@@ -264,17 +247,13 @@ define i32 @freeze_cttz_undef_nonzero(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_cttz_undef_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: bsfl %edi, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%f0 = freeze i32 %a0
%y = or i32 %f0, 1
>From 49249dd3b6250963367c3125306ecb85ab65dfe8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Jun 2025 16:57:33 +0100
Subject: [PATCH 2/2] Remove ISD::CTLZ_ZERO_UNDEF/CTTZ_ZERO_UNDEF handling
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 -------
llvm/test/CodeGen/X86/freeze-unary.ll | 22 ++++++++++++++-----
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a2b4300c946b8..ad941a1964683 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5620,14 +5620,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
PoisonOnly, Depth + 1) ||
!getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1);
- case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTLZ_ZERO_UNDEF:
- // If the amount is zero then the result will be poison.
- // TODO: Add isKnownNeverZero DemandedElts handling.
- return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
- PoisonOnly, Depth + 1) ||
- !isKnownNeverZero(Op.getOperand(0), Depth + 1);
-
case ISD::SCALAR_TO_VECTOR:
// Check if we demand any upper (undef) elements.
return !PoisonOnly && DemandedElts.ugt(1);
diff --git a/llvm/test/CodeGen/X86/freeze-unary.ll b/llvm/test/CodeGen/X86/freeze-unary.ll
index 4ce88f0582412..3d1f676115069 100644
--- a/llvm/test/CodeGen/X86/freeze-unary.ll
+++ b/llvm/test/CodeGen/X86/freeze-unary.ll
@@ -180,15 +180,21 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $31, %eax
+; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: xorl $31, %ecx
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_ctlz_undef_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: bsrl %edi, %eax
-; X64-NEXT: xorl $31, %eax
+; X64-NEXT: bsrl %edi, %ecx
+; X64-NEXT: xorl $31, %ecx
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%f0 = freeze i32 %a0
%y = or i32 %f0, 1
@@ -247,13 +253,17 @@ define i32 @freeze_cttz_undef_nonzero(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
-; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: bsfl %eax, %ecx
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: freeze_cttz_undef_nonzero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: rep bsfl %edi, %eax
+; X64-NEXT: bsfl %edi, %ecx
+; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%f0 = freeze i32 %a0
%y = or i32 %f0, 1
More information about the llvm-commits
mailing list