[llvm] [SDAG] Handle insert_subvector in isKnownNeverNaN (PR #131989)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 02:44:29 PDT 2025
https://github.com/tclin914 created https://github.com/llvm/llvm-project/pull/131989
Propagate nnan across insert_subvector.
>From 1b5c3fa0073a35056b51c0a7b8a8d5141bcdd478 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 19 Mar 2025 14:45:30 +0800
Subject: [PATCH 1/2] [SDAG] Pre-commit
---
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 51 +++++++++++++++++++
.../RISCV/rvv/fixed-vectors-fminimum.ll | 51 +++++++++++++++++++
2 files changed, 102 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 04e73ac1ea956..ef81f0d18c8cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -357,3 +357,54 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
}
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vfadd.vv v9, v9, v9
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vslideup.vi v8, v9, 2
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %d = fadd nnan <2 x half> %a, %a
+ %e = fadd nnan <2 x half> %b, %b
+ %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+ %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+ %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %g, <4 x half> %c)
+ ret <4 x half> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a0334a9a5d20a..8504c51901a77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -357,3 +357,54 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
}
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vfadd.vv v9, v9, v9
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vslideup.vi v8, v9, 2
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT: vfmin.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %d = fadd nnan <2 x half> %a, %a
+ %e = fadd nnan <2 x half> %b, %b
+ %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+ %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+ %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %g, <4 x half> %c)
+ ret <4 x half> %v
+}
>From 1ece27bec3727f004acf2e5a83753af667f2491b Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 19 Mar 2025 14:30:55 +0800
Subject: [PATCH 2/2] [SDAG] Handle insert_subvector in isKnownNeverNaN
Propagate nnan across insert_subvector.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 ++
llvm/test/CodeGen/AMDGPU/clamp.ll | 34 ++++----
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 86 +++++++------------
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 6 +-
.../RISCV/rvv/fixed-vectors-fminimum.ll | 6 +-
5 files changed, 58 insertions(+), 80 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d1f92c9ef00e9..4b3dd5e0d6975 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5625,6 +5625,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
(SNaN && !C->getValueAPF().isSignaling());
}
+ if (Op.isUndef())
+ return true;
+
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case ISD::FADD:
@@ -5727,6 +5730,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::EXTRACT_SUBVECTOR: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
+ case ISD::INSERT_SUBVECTOR:
+ return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
case ISD::BUILD_VECTOR: {
for (const SDValue &Opnd : Op->ops())
if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 17c84d7371de1..a08228fc919b8 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dword v3, v[0:1]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX6-NEXT: v_max_f32_e32 v3, 0x7fc00000, v3
+; GFX6-NEXT: v_max_f32_e32 v3, s0, v3
+; GFX6-NEXT: v_max_f32_e32 v2, 0, v2
; GFX6-NEXT: v_min_f32_e32 v3, 1.0, v3
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT: v_med3_f32 v2, v2, 0, v4
+; GFX6-NEXT: v_min_f32_e32 v2, s0, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00, v2
+; GFX8-NEXT: v_max_f16_e32 v2, s0, v2
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
-; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_min_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_dword v3, v[0:1]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 15cb404a3840a..7068ef9f99d73 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-GFX1100-FAKE16: ; %bb.0:
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX900: ; %bb.0:
-; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; GFX900-NEXT: v_mov_b32_e32 v0, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX906: ; %bb.0:
-; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
+; GFX906-NEXT: v_mov_b32_e32 v0, v3
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; SDAG-VI: ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX900: ; %bb.0:
-; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX906: ; %bb.0:
-; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index ef81f0d18c8cc..c6cd366497218 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -367,12 +367,10 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vfadd.vv v9, v9, v9
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vslideup.vi v8, v9, 2
-; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vslideup.vi v8, v9, 2
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT: vfmax.vv v8, v8, v9
+; ZVFH-NEXT: vfmax.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index 8504c51901a77..568923db83591 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -367,12 +367,10 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vfadd.vv v9, v9, v9
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vslideup.vi v8, v9, 2
-; ZVFH-NEXT: vmfeq.vv v0, v8, v8
-; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vslideup.vi v8, v9, 2
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
-; ZVFH-NEXT: vfmin.vv v8, v8, v9
+; ZVFH-NEXT: vfmin.vv v8, v8, v10
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
More information about the llvm-commits
mailing list