[llvm] [DAG] canCreateUndefOrPoison - add handling for CTTZ/CTLZ nodes (PR #146361)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 30 08:57:49 PDT 2025


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/146361

>From 0df3a687442ecbf10d68b45ae8f289a19207062f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Jun 2025 15:05:03 +0100
Subject: [PATCH 1/2] [DAG] canCreateUndefOrPoison - add handling for CTTZ/CTLZ
 nodes

ISD::CTTZ/CTLZ nodes handle all input values and do not create undef/poison.

The *_ZERO_UNDEF variants' results are only non-poison if the input is non-poison and non-zero.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 ++++
 llvm/test/CodeGen/X86/freeze-unary.ll         | 55 ++++++-------------
 2 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 93cfe6f02bc84..a2b4300c946b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5543,6 +5543,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::FSHL:
   case ISD::FSHR:
   case ISD::BSWAP:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
   case ISD::CTPOP:
   case ISD::BITREVERSE:
   case ISD::PARITY:
@@ -5618,6 +5620,14 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
                                              PoisonOnly, Depth + 1) ||
            !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1);
 
+  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTLZ_ZERO_UNDEF:
+    // If the amount is zero then the result will be poison.
+    // TODO: Add isKnownNeverZero DemandedElts handling.
+    return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
+                                             PoisonOnly, Depth + 1) ||
+           !isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
   case ISD::SCALAR_TO_VECTOR:
     // Check if we demand any upper (undef) elements.
     return !PoisonOnly && DemandedElts.ugt(1);
diff --git a/llvm/test/CodeGen/X86/freeze-unary.ll b/llvm/test/CodeGen/X86/freeze-unary.ll
index 2e707a4fee82f..4ce88f0582412 100644
--- a/llvm/test/CodeGen/X86/freeze-unary.ll
+++ b/llvm/test/CodeGen/X86/freeze-unary.ll
@@ -129,24 +129,17 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
 define i32 @freeze_ctlz(i32 %a0) nounwind {
 ; X86-LABEL: freeze_ctlz:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bsrl %eax, %ecx
-; X86-NEXT:    movl $63, %edx
-; X86-NEXT:    cmovnel %ecx, %edx
-; X86-NEXT:    xorl $31, %edx
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    movl $32, %eax
-; X86-NEXT:    cmovnel %edx, %eax
+; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $63, %eax
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    xorl $31, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_ctlz:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $63, %ecx
-; X64-NEXT:    bsrl %edi, %ecx
-; X64-NEXT:    xorl $31, %ecx
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    cmovnel %ecx, %eax
+; X64-NEXT:    movl $63, %eax
+; X64-NEXT:    bsrl %edi, %eax
+; X64-NEXT:    xorl $31, %eax
 ; X64-NEXT:    retq
   %x = call i32 @llvm.ctlz.i32(i32 %a0, i1 0)
   %f = freeze i32 %x
@@ -187,21 +180,15 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
-; X86-NEXT:    bsrl %eax, %ecx
-; X86-NEXT:    xorl $31, %ecx
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    movl $32, %eax
-; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    bsrl %eax, %eax
+; X86-NEXT:    xorl $31, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_ctlz_undef_nonzero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    bsrl %edi, %ecx
-; X64-NEXT:    xorl $31, %ecx
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    cmovnel %ecx, %eax
+; X64-NEXT:    bsrl %edi, %eax
+; X64-NEXT:    xorl $31, %eax
 ; X64-NEXT:    retq
   %f0 = freeze i32 %a0
   %y = or i32 %f0, 1
@@ -215,19 +202,15 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
 define i32 @freeze_cttz(i32 %a0) nounwind {
 ; X86-LABEL: freeze_cttz:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bsfl %eax, %eax
-; X86-NEXT:    movl $32, %ecx
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $32, %eax
+; X86-NEXT:    cmovnel %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_cttz:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $32, %ecx
 ; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    bsfl %edi, %eax
-; X64-NEXT:    cmovel %ecx, %eax
+; X64-NEXT:    rep bsfl %edi, %eax
 ; X64-NEXT:    retq
   %x = call i32 @llvm.cttz.i32(i32 %a0, i1 0)
   %f = freeze i32 %x
@@ -264,17 +247,13 @@ define i32 @freeze_cttz_undef_nonzero(i32 %a0) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
-; X86-NEXT:    bsfl %eax, %ecx
-; X86-NEXT:    movl $32, %eax
-; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    rep bsfl %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_cttz_undef_nonzero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    bsfl %edi, %ecx
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    cmovnel %ecx, %eax
+; X64-NEXT:    rep bsfl %edi, %eax
 ; X64-NEXT:    retq
   %f0 = freeze i32 %a0
   %y = or i32 %f0, 1

>From 49249dd3b6250963367c3125306ecb85ab65dfe8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Jun 2025 16:57:33 +0100
Subject: [PATCH 2/2] Remove ISD::CTLZ_ZERO_UNDEF/CTTZ_ZERO_UNDEF handling

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  8 -------
 llvm/test/CodeGen/X86/freeze-unary.ll         | 22 ++++++++++++++-----
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a2b4300c946b8..ad941a1964683 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5620,14 +5620,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
                                              PoisonOnly, Depth + 1) ||
            !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1);
 
-  case ISD::CTTZ_ZERO_UNDEF:
-  case ISD::CTLZ_ZERO_UNDEF:
-    // If the amount is zero then the result will be poison.
-    // TODO: Add isKnownNeverZero DemandedElts handling.
-    return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
-                                             PoisonOnly, Depth + 1) ||
-           !isKnownNeverZero(Op.getOperand(0), Depth + 1);
-
   case ISD::SCALAR_TO_VECTOR:
     // Check if we demand any upper (undef) elements.
     return !PoisonOnly && DemandedElts.ugt(1);
diff --git a/llvm/test/CodeGen/X86/freeze-unary.ll b/llvm/test/CodeGen/X86/freeze-unary.ll
index 4ce88f0582412..3d1f676115069 100644
--- a/llvm/test/CodeGen/X86/freeze-unary.ll
+++ b/llvm/test/CodeGen/X86/freeze-unary.ll
@@ -180,15 +180,21 @@ define i32 @freeze_ctlz_undef_nonzero(i32 %a0) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
-; X86-NEXT:    bsrl %eax, %eax
-; X86-NEXT:    xorl $31, %eax
+; X86-NEXT:    bsrl %eax, %ecx
+; X86-NEXT:    xorl $31, %ecx
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl $32, %eax
+; X86-NEXT:    cmovnel %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_ctlz_undef_nonzero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    bsrl %edi, %eax
-; X64-NEXT:    xorl $31, %eax
+; X64-NEXT:    bsrl %edi, %ecx
+; X64-NEXT:    xorl $31, %ecx
+; X64-NEXT:    testl %edi, %edi
+; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    retq
   %f0 = freeze i32 %a0
   %y = or i32 %f0, 1
@@ -247,13 +253,17 @@ define i32 @freeze_cttz_undef_nonzero(i32 %a0) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl $1, %eax
-; X86-NEXT:    rep bsfl %eax, %eax
+; X86-NEXT:    bsfl %eax, %ecx
+; X86-NEXT:    movl $32, %eax
+; X86-NEXT:    cmovnel %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_cttz_undef_nonzero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    orl $1, %edi
-; X64-NEXT:    rep bsfl %edi, %eax
+; X64-NEXT:    bsfl %edi, %ecx
+; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    retq
   %f0 = freeze i32 %a0
   %y = or i32 %f0, 1



More information about the llvm-commits mailing list