[llvm] AMDGPU: Make v2f16 minimum/maximum legal for gfx950 (PR #117738)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 26 11:48:28 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117738

>From 7f94002e5c049a73e98ccacba7d3135e63bc0ab6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 28 May 2024 11:29:41 +0200
Subject: [PATCH] AMDGPU: Make v2f16 minimum/maximum legal for gfx950

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp    |    3 +
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td  |   11 +
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll        | 1461 ++++++++++--------
 llvm/test/CodeGen/AMDGPU/fminimum3.ll        | 1461 ++++++++++--------
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll |  348 ++---
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll |  348 ++---
 6 files changed, 1850 insertions(+), 1782 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a24b6430378cc9..a212a9218ca0db 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -859,6 +859,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
     if (Subtarget->hasMinimum3Maximum3F32())
       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
+
+    if (Subtarget->hasMinimum3Maximum3PKF16())
+      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::v2f16, Legal);
   }
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ae5a6581a3b200..7d202de6643bcb 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -255,6 +255,17 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
   >;
 }
 
+class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
+                                            Instruction inst> : GCNPat<
+  (v2f16 (node (VOP3PMods v2f16:$src0, i32:$src0_mods), (VOP3PMods v2f16:$src1, i32:$src1_mods))),
+  (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
+>;
+
+let SubtargetPredicate = HasMinimum3Maximum3PKF16 in {
+def : MinimumMaximumByMinimum3Maximum3VOP3P<fminimum, V_PK_MINIMUM3_F16>;
+def : MinimumMaximumByMinimum3Maximum3VOP3P<fmaximum, V_PK_MAXIMUM3_F16>;
+}
+
 let SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] in {
 
 // These are VOP3a-like opcodes which accept no omod.
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index e771e5801f2eda..f0fa621e3b4bc3 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1772,30 +1772,38 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v2, v0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v2, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v2, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v2, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max0)
   ret <2 x half> %max1
@@ -1814,30 +1822,38 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
   ret <2 x half> %max1
@@ -1859,32 +1875,43 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX9-NEXT:    v_pk_max_f16 v3, v3, v4
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v4, v0, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
+; GFX940-NEXT:    v_pk_max_f16 v3, v3, v4
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v4, v0, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
   %b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
   %c.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %c)
@@ -1906,30 +1933,38 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <2 x half> %a
   %b.fneg = fneg <2 x half> %b
   %c.fneg = fneg <2 x half> %c
@@ -1951,30 +1986,38 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v3, v0, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v3, v0, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
   %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> %c)
   ret <2 x half> %max1
@@ -1993,30 +2036,38 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v2f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v2, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v2, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v4, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
   ret <2 x half> %max1
@@ -2037,42 +2088,51 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v5, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v5, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v5, v1, v1
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v4, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %c, <3 x half> %max0)
   ret <3 x half> %max1
@@ -2093,42 +2153,51 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
   ret <3 x half> %max1
@@ -2156,46 +2225,61 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX9-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX9-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v6, v6, v8
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v8, v0, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_perm_b32 v6, v9, v1, s0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX9-NEXT:    v_pk_max_f16 v6, v6, v10
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
+; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
+; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
+; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
+; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v8, v0, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_perm_b32 v6, v9, v1, s0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
   %b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
   %c.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %c)
@@ -2219,42 +2303,51 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <3 x half> %a
   %b.fneg = fneg <3 x half> %b
   %c.fneg = fneg <3 x half> %c
@@ -2278,39 +2371,48 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v7, v1, 2.0
-; GFX9-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v5, v0, s1
-; GFX9-NEXT:    v_pk_max_f16 v4, v4, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX9-NEXT:    v_pk_max_f16 v7, v7, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0
+; GFX940-NEXT:    s_mov_b32 s1, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_movk_i32 s0, 0x7e00
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_perm_b32 v4, v5, v0, s1
+; GFX940-NEXT:    v_pk_max_f16 v4, v4, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
+; GFX940-NEXT:    v_pack_b32_f16 v7, v1, s0
+; GFX940-NEXT:    v_pk_max_f16 v7, v7, v3
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s1
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 2.0, 2.0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
   %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> %c)
   ret <3 x half> %max1
@@ -2331,42 +2433,51 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v3f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, 4.0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX940-NEXT:    v_pk_max_f16 v1, v1, 4.0
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 4.0, 4.0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.maximum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
   ret <3 x half> %max1
@@ -2387,48 +2498,57 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v5, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v4, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v5, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v4, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v5, v1, v1
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v4, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %c, <4 x half> %max0)
   ret <4 x half> %max1
@@ -2449,48 +2569,57 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v5
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
   ret <4 x half> %max1
@@ -2518,52 +2647,67 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX9-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX9-NEXT:    v_pk_max_f16 v7, v7, v9
-; GFX9-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v6, v6, v8
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v8, v1, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v11
-; GFX9-NEXT:    v_perm_b32 v6, v9, v0, s0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v6, v6, v10
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX9-NEXT:    v_perm_b32 v1, v3, v1, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v7, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
+; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
+; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
+; GFX940-NEXT:    v_pk_max_f16 v7, v7, v9
+; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v8, v1, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v11
+; GFX940-NEXT:    v_perm_b32 v6, v9, v0, s0
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v6, v6, v10
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
   %b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
   %c.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %c)
@@ -2587,48 +2731,57 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <4 x half> %a
   %b.fneg = fneg <4 x half> %b
   %c.fneg = fneg <4 x half> %c
@@ -2652,46 +2805,55 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v8, v1, s0
-; GFX9-NEXT:    v_pk_max_f16 v4, v4, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_perm_b32 v8, v5, v0, s0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX9-NEXT:    v_pk_max_f16 v8, v8, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    v_perm_b32 v1, v7, v1, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_perm_b32 v4, v8, v1, s0
+; GFX940-NEXT:    v_pk_max_f16 v4, v4, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_perm_b32 v8, v5, v0, s0
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX940-NEXT:    v_pk_max_f16 v8, v8, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
   %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> %c)
   ret <4 x half> %max1
@@ -2712,48 +2874,57 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fmaximum3_v4f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_max_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v4, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX9-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_max_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v4, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX940-NEXT:    v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.maximum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
   ret <4 x half> %max1
@@ -3523,30 +3694,38 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX12-NEXT:    v_pk_maximum_f16 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_no_fmaximum3_v2f16__multi_use:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX9-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_no_fmaximum3_v2f16__multi_use:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_max_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v1, v5, s0
+; GFX940-NEXT:    v_pk_max_f16 v3, v0, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c)
   %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 6a697aadbf3ba8..7a8a224c76a836 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -1772,30 +1772,38 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v2, v0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v2, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v2, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v2, v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v2, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %max0)
   ret <2 x half> %max1
@@ -1814,30 +1822,38 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
   ret <2 x half> %max1
@@ -1859,32 +1875,43 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
-; GFX9-NEXT:    v_pk_min_f16 v3, v3, v4
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v4, v0, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v1
+; GFX940-NEXT:    v_pk_min_f16 v3, v3, v4
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v1|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v4, v0, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
   %b.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %b)
   %c.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %c)
@@ -1906,30 +1933,38 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v5, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v5, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v5, -v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <2 x half> %a
   %b.fneg = fneg <2 x half> %b
   %c.fneg = fneg <2 x half> %c
@@ -1951,30 +1986,38 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v3, v0, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v1
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v3, v0, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v1
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
   %max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> %c)
   ret <2 x half> %max1
@@ -1993,30 +2036,38 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v2f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v0, v4, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v2, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v2f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v2, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v3, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v0, v4, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v2, v3, v1, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v2f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max0, <2 x half> <half 4.0, half 4.0>)
   ret <2 x half> %max1
@@ -2037,42 +2088,51 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v5, v1
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v5, v1
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v4, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v5, v1, v1
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v4, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %c, <3 x half> %max0)
   ret <3 x half> %max1
@@ -2093,42 +2153,51 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
   ret <3 x half> %max1
@@ -2156,46 +2225,61 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
-; GFX9-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
-; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
-; GFX9-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v6, v6, v8
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
-; GFX9-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v8, v0, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_perm_b32 v6, v9, v1, s0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX9-NEXT:    v_pk_min_f16 v6, v6, v10
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v1
+; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v3
+; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v2
+; GFX940-NEXT:    v_pk_min_f16 v7, v7, v9
+; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v4
+; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v5
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v8, v0, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v11
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_perm_b32 v6, v9, v1, s0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX940-NEXT:    v_pk_min_f16 v6, v6, v10
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %a)
   %b.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %b)
   %c.fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %c)
@@ -2219,42 +2303,51 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <3 x half> %a
   %b.fneg = fneg <3 x half> %b
   %c.fneg = fneg <3 x half> %c
@@ -2278,39 +2371,48 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v7, v1, 2.0
-; GFX9-NEXT:    s_mov_b32 s1, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_movk_i32 s0, 0x7e00
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v5, v0, s1
-; GFX9-NEXT:    v_pk_min_f16 v4, v4, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
-; GFX9-NEXT:    v_pack_b32_f16 v7, v1, s0
-; GFX9-NEXT:    v_pk_min_f16 v7, v7, v3
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v7, v1, 2.0
+; GFX940-NEXT:    s_mov_b32 s1, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_movk_i32 s0, 0x7e00
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_perm_b32 v4, v5, v0, s1
+; GFX940-NEXT:    v_pk_min_f16 v4, v4, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v7, vcc
+; GFX940-NEXT:    v_pack_b32_f16 v7, v1, s0
+; GFX940-NEXT:    v_pk_min_f16 v7, v7, v3
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s1
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, 2.0, 2.0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> <half 2.0, half 2.0, half 2.0>)
   %max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> %c)
   ret <3 x half> %max1
@@ -2331,42 +2433,51 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v3f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v4, s0
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, 4.0
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v3, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v3f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v4, s0
+; GFX940-NEXT:    v_pk_min_f16 v1, v1, 4.0
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v3f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, 4.0, 4.0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
   %max1 = call <3 x half> @llvm.minimum.v3f16(<3 x half> %max0, <3 x half> <half 4.0, half 4.0, half 4.0>)
   ret <3 x half> %max1
@@ -2387,48 +2498,57 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v5, v1
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v5, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v4, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v5, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v6
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v4, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v8
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v5, v1, v1
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v4, v0, v0
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %c, <4 x half> %max0)
   ret <4 x half> %max1
@@ -2449,48 +2569,57 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16_commute:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v5
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v4
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16_commute:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v5
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v8, v4
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16_commute:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
   ret <4 x half> %max1
@@ -2518,52 +2647,67 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16__fabs_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
-; GFX9-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
-; GFX9-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
-; GFX9-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
-; GFX9-NEXT:    v_pk_min_f16 v7, v7, v9
-; GFX9-NEXT:    v_mov_b32_e32 v12, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v6, v6, v8
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
-; GFX9-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v8, v1, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v11
-; GFX9-NEXT:    v_perm_b32 v6, v9, v0, s0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v6, v6, v10
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
-; GFX9-NEXT:    v_perm_b32 v1, v3, v1, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v7, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16__fabs_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_and_b32_e32 v7, 0x7fff7fff, v0
+; GFX940-NEXT:    v_and_b32_e32 v9, 0x7fff7fff, v2
+; GFX940-NEXT:    v_and_b32_e32 v6, 0x7fff7fff, v1
+; GFX940-NEXT:    v_and_b32_e32 v8, 0x7fff7fff, v3
+; GFX940-NEXT:    v_pk_min_f16 v7, v7, v9
+; GFX940-NEXT:    v_mov_b32_e32 v12, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v9, v12, v9, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_and_b32_e32 v11, 0x7fff7fff, v5
+; GFX940-NEXT:    v_and_b32_e32 v10, 0x7fff7fff, v4
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v12, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v0|, |v2|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, |v1|, |v3|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v8, v1, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v11
+; GFX940-NEXT:    v_perm_b32 v6, v9, v0, s0
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v6, v6, v10
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v1, |v5|
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v12, v2, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v0, |v4|
+; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v12, v6, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v7, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16__fabs_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX950-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX950-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX950-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX950-NEXT:    v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX950-NEXT:    v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
   %b.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
   %c.fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %c)
@@ -2587,48 +2731,57 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16__fneg_all:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v8, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
-; GFX9-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16__fneg_all:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    v_mov_b32_e32 v7, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v7, v6, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v6, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, -v1, -v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v6, -v5
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v8, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX940-NEXT:    v_cmp_o_f16_e64 vcc, v8, -v4
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v7, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16__fneg_all:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2 neg_lo:[1,1,1] neg_hi:[1,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg <4 x half> %a
   %b.fneg = fneg <4 x half> %b
   %c.fneg = fneg <4 x half> %c
@@ -2652,46 +2805,55 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16__inlineimm1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7e00
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; GFX9-NEXT:    v_perm_b32 v4, v8, v1, s0
-; GFX9-NEXT:    v_pk_min_f16 v4, v4, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_perm_b32 v8, v5, v0, s0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX9-NEXT:    v_pk_min_f16 v8, v8, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    v_perm_b32 v1, v7, v1, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v5, v0, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16__inlineimm1:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_mov_b32_e32 v6, 0x7e00
+; GFX940-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v8, 16, v7
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; GFX940-NEXT:    v_perm_b32 v4, v8, v1, s0
+; GFX940-NEXT:    v_pk_min_f16 v4, v4, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    v_perm_b32 v8, v5, v0, s0
+; GFX940-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX940-NEXT:    v_pk_min_f16 v8, v8, v2
+; GFX940-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v6, v9, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    v_perm_b32 v1, v7, v1, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v5, v0, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16__inlineimm1:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 2.0, 2.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>)
   %max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> %c)
   ret <4 x half> %max1
@@ -2712,48 +2874,57 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_fminimum3_v4f16__inlineimm2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_min_f16 v2, v1, v3
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v1, v4, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v2, v0, v6, s0
-; GFX9-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
-; GFX9-NEXT:    v_perm_b32 v1, v1, v3, s0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v0, v4, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_fminimum3_v4f16__inlineimm2:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX940-NEXT:    v_mov_b32_e32 v5, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    v_pk_min_f16 v2, v1, v3
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v1, v4, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v4, v4
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v3, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v1, v1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v2, v0, v6, s0
+; GFX940-NEXT:    v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v6, v6
+; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s0
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v4, v5, v2, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_fminimum3_v4f16__inlineimm2:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, 4.0, 4.0 op_sel_hi:[1,0,0]
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
   %max1 = call <4 x half> @llvm.minimum.v4f16(<4 x half> %max0, <4 x half> <half 4.0, half 4.0, half 4.0, half 4.0>)
   ret <4 x half> %max1
@@ -3523,30 +3694,38 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
 ; GFX12-NEXT:    v_pk_minimum_f16 v1, v0, v2
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_no_fminimum3_v2f16__multi_use:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v3, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7e00
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX9-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v0, v1, v5, s0
-; GFX9-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX9-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
-; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT:    v_perm_b32 v1, v1, v5, s0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX940-LABEL: v_no_fminimum3_v2f16__multi_use:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    v_pk_min_f16 v3, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0x7e00
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
+; GFX940-NEXT:    s_mov_b32 s0, 0x5040100
+; GFX940-NEXT:    s_nop 0
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v0, v1, v5, s0
+; GFX940-NEXT:    v_pk_min_f16 v3, v0, v2
+; GFX940-NEXT:    v_cmp_o_f16_e32 vcc, v5, v2
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v5, v4, v3, vcc
+; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX940-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
+; GFX940-NEXT:    s_nop 1
+; GFX940-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX940-NEXT:    v_perm_b32 v1, v1, v5, s0
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_no_fminimum3_v2f16__multi_use:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_nop 0
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b)
   %max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c)
   %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index e782f53cee6087..1d0367db701436 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -572,17 +572,7 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v2f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f16:
@@ -650,11 +640,17 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v2f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -735,17 +731,7 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v2f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f16__nsz:
@@ -813,11 +799,17 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -918,21 +910,9 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX950-LABEL: s_maximum_v2f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b32_e32 v0, s1
-; GFX950-NEXT:    v_mov_b32_e32 v1, s1
-; GFX950-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX950-NEXT:    v_pk_max_f16 v1, s0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX950-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX950-NEXT:    v_mov_b32_e32 v3, s1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v3
-; GFX950-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT:    v_mov_b32_e32 v0, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, s1, s1
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX950-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v0
 ; GFX950-NEXT:    ;;#ASMEND
@@ -1061,20 +1041,8 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v3f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f16:
@@ -1156,12 +1124,19 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v3f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -1258,20 +1233,8 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v3f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f16__nsz:
@@ -1353,12 +1316,19 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v3f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -1473,26 +1443,8 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v4f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f16:
@@ -1590,12 +1542,19 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v4f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -1710,26 +1669,8 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v4f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v3, v0, v2
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f16__nsz:
@@ -1827,12 +1768,19 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximum_v4f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -2009,44 +1957,10 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v8f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX950-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v7, v2, v6
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX950-NEXT:    v_perm_b32 v3, v3, v10, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v6, v1, v5
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX950-NEXT:    v_perm_b32 v2, v2, v8, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v5, v0, v4
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX950-NEXT:    v_perm_b32 v1, v1, v7, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_maximum3_f16 v2, v2, v6, v6
+; GFX950-NEXT:    v_pk_maximum3_f16 v3, v3, v7, v7
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v8f16:
@@ -2414,80 +2328,14 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX950-LABEL: v_maximum_v16f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX950-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v15, v6, v14
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
-; GFX950-NEXT:    v_perm_b32 v7, v7, v18, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v14, v5, v13
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
-; GFX950-NEXT:    v_perm_b32 v6, v6, v16, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v13, v4, v12
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
-; GFX950-NEXT:    v_perm_b32 v5, v5, v15, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v12, v3, v11
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
-; GFX950-NEXT:    v_perm_b32 v4, v4, v14, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v11, v2, v10
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX950-NEXT:    v_perm_b32 v3, v3, v13, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v10, v1, v9
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX950-NEXT:    v_perm_b32 v2, v2, v12, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_max_f16 v9, v0, v8
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX950-NEXT:    v_perm_b32 v1, v1, v11, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT:    v_pk_maximum3_f16 v0, v0, v8, v8
+; GFX950-NEXT:    v_pk_maximum3_f16 v1, v1, v9, v9
+; GFX950-NEXT:    v_pk_maximum3_f16 v2, v2, v10, v10
+; GFX950-NEXT:    v_pk_maximum3_f16 v3, v3, v11, v11
+; GFX950-NEXT:    v_pk_maximum3_f16 v4, v4, v12, v12
+; GFX950-NEXT:    v_pk_maximum3_f16 v5, v5, v13, v13
+; GFX950-NEXT:    v_pk_maximum3_f16 v6, v6, v14, v14
+; GFX950-NEXT:    v_pk_maximum3_f16 v7, v7, v15, v15
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v16f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index 329a85f91c2514..f8c2c54af27830 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -462,17 +462,7 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v2f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f16:
@@ -525,11 +515,17 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v2f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -590,17 +586,7 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v2f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v3, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v1
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v3, v2, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f16__nsz:
@@ -653,11 +639,17 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v2f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v1, v1
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -731,21 +723,9 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX950-LABEL: s_minimum_v2f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_mov_b32_e32 v0, s1
-; GFX950-NEXT:    v_mov_b32_e32 v1, s1
-; GFX950-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX950-NEXT:    v_pk_min_f16 v1, s0, v1
-; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
-; GFX950-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX950-NEXT:    v_mov_b32_e32 v3, s1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, s0, v3
-; GFX950-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX950-NEXT:    v_mov_b32_e32 v0, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, s1, s1
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX950-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX950-NEXT:    ;;#ASMSTART
 ; GFX950-NEXT:    ; use v0
 ; GFX950-NEXT:    ;;#ASMEND
@@ -847,20 +827,8 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v3f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f16:
@@ -922,12 +890,19 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v3f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -997,20 +972,8 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v3f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f16__nsz:
@@ -1072,12 +1035,19 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v3f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -1158,26 +1128,8 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v4f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f16:
@@ -1250,12 +1202,19 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v4f16__nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16__nnan:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16__nnan:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f16__nnan:
 ; GFX10:       ; %bb.0:
@@ -1336,26 +1295,8 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v4f16__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX950-NEXT:    v_mov_b32_e32 v5, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v3
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v3, v0, v2
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v2
-; GFX950-NEXT:    v_perm_b32 v1, v1, v6, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v4, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f16__nsz:
@@ -1428,12 +1369,19 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimum_v4f16__nnan_nsz:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f16__nnan_nsz:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f16__nnan_nsz:
+; GFX950:       ; %bb.0:
+; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v3, v3
+; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f16__nnan_nsz:
 ; GFX10:       ; %bb.0:
@@ -1548,44 +1496,10 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v8f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX950-NEXT:    v_mov_b32_e32 v9, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v3, v7
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v10, v9, v8, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v7 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v7, v2, v6
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v2, v6
-; GFX950-NEXT:    v_perm_b32 v3, v3, v10, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v8, v9, v7, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v6 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v6, v1, v5
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v5
-; GFX950-NEXT:    v_perm_b32 v2, v2, v8, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v5, v0, v4
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v6, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v4
-; GFX950-NEXT:    v_perm_b32 v1, v1, v7, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v9, v5, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v6, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v4, v4
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v5, v5
+; GFX950-NEXT:    v_pk_minimum3_f16 v2, v2, v6, v6
+; GFX950-NEXT:    v_pk_minimum3_f16 v3, v3, v7, v7
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v8f16:
@@ -1833,80 +1747,14 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX950-LABEL: v_minimum_v16f16:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX950-NEXT:    v_mov_b32_e32 v17, 0x7e00
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v7, v15
-; GFX950-NEXT:    s_mov_b32 s0, 0x5040100
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v18, v17, v16, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v7, v15 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v15, v6, v14
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v6, v14
-; GFX950-NEXT:    v_perm_b32 v7, v7, v18, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v16, v17, v15, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v6, v14 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v14, v5, v13
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v6, v17, v15, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v5, v13
-; GFX950-NEXT:    v_perm_b32 v6, v6, v16, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v14, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v5, v13 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v13, v4, v12
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v17, v14, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v4, v12
-; GFX950-NEXT:    v_perm_b32 v5, v5, v15, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v14, v17, v13, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v4, v12 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v12, v3, v11
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v4, v17, v13, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v3, v11
-; GFX950-NEXT:    v_perm_b32 v4, v4, v14, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v17, v12, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v3, v11 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v11, v2, v10
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v17, v12, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v2, v10
-; GFX950-NEXT:    v_perm_b32 v3, v3, v13, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v12, v17, v11, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v2, v10 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v10, v1, v9
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v2, v17, v11, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v1, v9
-; GFX950-NEXT:    v_perm_b32 v2, v2, v12, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v17, v10, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v1, v9 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    v_pk_min_f16 v9, v0, v8
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v10, vcc
-; GFX950-NEXT:    v_cmp_o_f16_e32 vcc, v0, v8
-; GFX950-NEXT:    v_perm_b32 v1, v1, v11, s0
-; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e32 v10, v17, v9, vcc
-; GFX950-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX950-NEXT:    v_cmp_o_f16_sdwa vcc, v0, v8 src0_sel:WORD_1 src1_sel:WORD_1
-; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e32 v0, v17, v9, vcc
-; GFX950-NEXT:    v_perm_b32 v0, v0, v10, s0
+; GFX950-NEXT:    v_pk_minimum3_f16 v0, v0, v8, v8
+; GFX950-NEXT:    v_pk_minimum3_f16 v1, v1, v9, v9
+; GFX950-NEXT:    v_pk_minimum3_f16 v2, v2, v10, v10
+; GFX950-NEXT:    v_pk_minimum3_f16 v3, v3, v11, v11
+; GFX950-NEXT:    v_pk_minimum3_f16 v4, v4, v12, v12
+; GFX950-NEXT:    v_pk_minimum3_f16 v5, v5, v13, v13
+; GFX950-NEXT:    v_pk_minimum3_f16 v6, v6, v14, v14
+; GFX950-NEXT:    v_pk_minimum3_f16 v7, v7, v15, v15
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v16f16:



More information about the llvm-commits mailing list