[llvm] [SDAG] Handle extract_subvector in isKnownNeverNaN (PR #131581)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 02:03:06 PDT 2025
https://github.com/tclin914 created https://github.com/llvm/llvm-project/pull/131581
Propagate nnan across extract_subvector.
>From a5e1b79a42366fa4ba1c5cc586c7e92cf3c442b3 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Mon, 17 Mar 2025 14:35:11 +0800
Subject: [PATCH] [SDAG] Handle extract_subvector in isKnownNeverNaN
Propagate nnan across extract_subvector.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +-
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 94 +++++++------------
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 20 ++--
.../RISCV/rvv/fixed-vectors-fminimum.ll | 20 ++--
4 files changed, 52 insertions(+), 85 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index df30148b78b65..0b2d182375881 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5723,7 +5723,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
}
- case ISD::EXTRACT_VECTOR_ELT: {
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
case ISD::BUILD_VECTOR: {
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 9949b823dfec1..15cb404a3840a 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1287,55 +1287,55 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v6, v7, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v6, v7, v8 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v9, v10, v11 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v2, v2 clamp
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-FAKE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX900: ; %bb.0:
-; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; SDAG-GFX906: ; %bb.0:
-; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp
-; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_mov_b32_e32 v0, v6
+; GFX906-NEXT: v_mov_b32_e32 v1, v2
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-VI: ; %bb.0:
@@ -1426,28 +1426,6 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GISEL-GFX900: ; %bb.0:
-; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GISEL-GFX906: ; %bb.0:
-; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2
-; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index e17ad303eddb8..04e73ac1ea956 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -290,17 +290,14 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
ret <2 x half> %v
}
-; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
-; ZVFH-NEXT: vfadd.vv v8, v8, v8
-; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
-; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
-; ZVFH-NEXT: vfmax.vv v8, v10, v8
+; ZVFH-NEXT: vmv1r.v v10, v9
+; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
+; ZVFH-NEXT: vfmax.vv v8, v10, v9
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
@@ -327,16 +324,13 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
ret <2 x half> %v
}
-; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vfadd.vv v9, v9, v9
-; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
-; ZVFH-NEXT: vmfeq.vv v0, v9, v9
-; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index 1362055c4dabf..a0334a9a5d20a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -290,17 +290,14 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
ret <2 x half> %v
}
-; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv_nnana:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v9, v9
-; ZVFH-NEXT: vfadd.vv v8, v8, v8
-; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0
-; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0
-; ZVFH-NEXT: vfmin.vv v8, v10, v8
+; ZVFH-NEXT: vmv1r.v v10, v9
+; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t
+; ZVFH-NEXT: vfmin.vv v8, v10, v9
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana:
@@ -327,16 +324,13 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
ret <2 x half> %v
}
-; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv_nnanb:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vfadd.vv v9, v9, v9
-; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0
-; ZVFH-NEXT: vmfeq.vv v0, v9, v9
-; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vmv1r.v v10, v8
+; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t
; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
More information about the llvm-commits
mailing list