[llvm] [DAG] isKnownNeverNaN - fallback to computeKnownFPClass check (PR #189476)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 13:46:21 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-selectiondag
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Remove ConstantFPSDNode handling from isKnownNeverNaN and fallback to using computeKnownFPClass if there are no opcode matches in isKnownNeverNaN
The test check changes are due to isKnownNeverNaN not handling UNDEF/POISON but computeKnownFPClass does (POISON in particular now returns isKnownNeverNaN == true, preventing a ISD::FCANONICALIZE call in expandFMINNUM_FMAXNUM).
---
Full diff: https://github.com/llvm/llvm-project/pull/189476.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+5-8)
- (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+4-8)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index dd810a07c9aa8..3716de880cce3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6149,12 +6149,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
- // If the value is a constant, we can obviously see if it is a NaN or not.
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
- return !C->getValueAPF().isNaN() ||
- (SNaN && !C->getValueAPF().isSignaling());
- }
-
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FADD:
@@ -6329,9 +6323,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN,
Depth);
}
-
- return false;
+ break;
}
+
+ FPClassTest NanMask = SNaN ? fcSNan : fcNan;
+ KnownFPClass Known = computeKnownFPClass(Op, DemandedElts, NanMask, Depth);
+ return Known.isKnownNever(NanMask);
}
bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index ffd52c2704409..e566d6291624f 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -3110,20 +3110,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dword v3, v[0:1]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3939,9 +3939,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00, v2
+; GFX8-NEXT: v_max_f16_e32 v2, s0, v2
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
-; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_min_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -4029,20 +4029,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dword v3, v[0:1]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 83640c1e4a97f..4bfd0387e06d0 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1081,7 +1081,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -1092,14 +1091,13 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
@@ -1107,7 +1105,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3
@@ -1118,7 +1115,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3
``````````
</details>
https://github.com/llvm/llvm-project/pull/189476
More information about the llvm-commits
mailing list