[llvm] AMDGPU: Directly select minimumnum/maximumnum with ieee_mode=0 (PR #141903)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu May 29 00:03:25 PDT 2025


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/141903

The hardware min/max follow the IR rules with IEEE mode disabled,
so we can avoid the canonicalizes of the input. We lose the quieting
of a signaling nan if both inputs are nans, but we only require that
with strictfp.

>From 740cd37d6d0b40c3f829e7c10badbcf4f16eb9ed Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 9 May 2025 08:16:50 +0200
Subject: [PATCH] AMDGPU: Directly select minimumnum/maximumnum with
 ieee_mode=0

The hardware min/max follow the IR rules with IEEE mode disabled,
so we can avoid the canonicalizes of the input. We lose the quieting
of a signaling nan if both inputs are nans, but we only require that
with strictfp.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |    2 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   25 +-
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |    2 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   36 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |    1 +
 llvm/lib/Target/AMDGPU/SIInstructions.td      |   46 +
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |   42 +-
 llvm/test/CodeGen/AMDGPU/maximumnum.ll        | 1442 +++++++----------
 llvm/test/CodeGen/AMDGPU/minimumnum.ll        | 1442 +++++++----------
 9 files changed, 1365 insertions(+), 1673 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 18a948d68e97b..7a50923ffedc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -92,6 +92,8 @@ def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().F
 def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
 def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
 def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
+def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
+def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 177750b639c67..ae530d35eff00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -957,12 +957,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
   }
 
-  auto &MinNumMaxNum = getActionDefinitionsBuilder({
-      G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
-
-  // TODO: These should be custom lowered and are directly legal with IEEE=0
-  auto &MinimumNumMaximumNum =
-      getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
+  auto &MinNumMaxNum = getActionDefinitionsBuilder(
+      {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
+       G_FMAXNUM_IEEE});
 
   if (ST.hasVOP3PInsts()) {
     MinNumMaxNum.customFor(FPTypesPK16)
@@ -980,8 +977,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .scalarize(0);
   }
 
-  MinimumNumMaximumNum.lower();
-
   if (ST.hasVOP3PInsts())
     FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
 
@@ -2160,6 +2155,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
     return legalizeFPTOI(MI, MRI, B, false);
   case TargetOpcode::G_FMINNUM:
   case TargetOpcode::G_FMAXNUM:
+  case TargetOpcode::G_FMINIMUMNUM:
+  case TargetOpcode::G_FMAXIMUMNUM:
   case TargetOpcode::G_FMINNUM_IEEE:
   case TargetOpcode::G_FMAXNUM_IEEE:
     return legalizeMinNumMaxNum(Helper, MI);
@@ -2739,9 +2736,17 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
                         MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
 
   // With ieee_mode disabled, the instructions have the correct behavior
-  // already for G_FMINNUM/G_FMAXNUM
-  if (!MFI->getMode().IEEE)
+  // already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
+  //
+  // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
+  // enabled.
+  if (!MFI->getMode().IEEE) {
+    if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
+        MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
+      return true;
+
     return !IsIEEEOp;
+  }
 
   if (IsIEEEOp)
     return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index dd7aef8f0c583..4391a48ff2b68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4009,6 +4009,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_FMAXNUM:
   case AMDGPU::G_FMINIMUM:
   case AMDGPU::G_FMAXIMUM:
+  case AMDGPU::G_FMINIMUMNUM:
+  case AMDGPU::G_FMAXIMUMNUM:
   case AMDGPU::G_INTRINSIC_TRUNC:
   case AMDGPU::G_STRICT_FADD:
   case AMDGPU::G_STRICT_FSUB:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 74ca3e43fce3a..f161e5185e196 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -523,8 +523,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
                        Legal);
 
-  setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
-                     Custom);
+  setOperationAction(
+      {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+      {MVT::f32, MVT::f64}, Custom);
 
   // These are really only legal for ieee_mode functions. We should be avoiding
   // them for functions that don't have ieee_mode enabled, so just say they are
@@ -756,7 +757,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     // allows matching fneg (fabs x) patterns)
     setOperationAction(ISD::FABS, MVT::v2f16, Legal);
 
-    setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
+    setOperationAction(
+        {ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+        MVT::f16, Custom);
     setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
 
     setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::FMINIMUMNUM,
@@ -810,8 +813,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
                          VT, Custom);
 
-    setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
-                       Custom);
+    setOperationAction(
+        {ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
+        {MVT::v2f16, MVT::v4f16}, Custom);
 
     setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
     setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v4bf16},
@@ -6057,6 +6061,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
     return lowerFMINNUM_FMAXNUM(Op, DAG);
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
+    return lowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
     return lowerFMINIMUM_FMAXIMUM(Op, DAG);
@@ -6081,8 +6088,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FMUL:
   case ISD::FMINNUM_IEEE:
   case ISD::FMAXNUM_IEEE:
-  case ISD::FMINIMUMNUM:
-  case ISD::FMAXIMUMNUM:
   case ISD::UADDSAT:
   case ISD::USUBSAT:
   case ISD::SADDSAT:
@@ -6967,6 +6972,23 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
   return Op;
 }
 
+SDValue
+SITargetLowering::lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  bool IsIEEEMode = Info->getMode().IEEE;
+
+  if (IsIEEEMode)
+    return expandFMINIMUMNUM_FMAXIMUMNUM(Op.getNode(), DAG);
+
+  if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
+      VT == MVT::v16bf16)
+    return splitBinaryVectorOp(Op, DAG);
+  return Op;
+}
+
 SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
                                                  SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c42366a1c04c8..532d1e46714e6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -146,6 +146,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   /// Custom lowering for ISD::FP_ROUND for MVT::f16.
   SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
   SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2e2913d88cc54..0cb3ba38e8016 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1376,6 +1376,52 @@ def : GCNPat <
       (i32 (V_MOV_B32_e32 (i32 0))), sub1)
 >;
 
+
+
+class FPBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
+  : GCNPat <(vt (node (vt (VOP3Mods vt:$src0, i32:$src0_mods)),
+                      (vt (VOP3Mods vt:$src1, i32:$src1_mods)))),
+    (inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+class FPPkBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
+  : GCNPat <(vt (node (VOP3PMods v2f16:$src0, i32:$src0_mods),
+                      (VOP3PMods v2f16:$src1, i32:$src1_mods))),
+  (inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE)
+>;
+
+/// With IEEE=0, signalingness is ignored and the non-nan input will
+/// be directly returned.
+let OtherPredicates = [IEEEModeDisabled] in {
+  def : FPBinOpPat<fminimumnum, f32, V_MIN_F32_e64>;
+  def : FPBinOpPat<fmaximumnum, f32, V_MAX_F32_e64>;
+  def : FPBinOpPat<fminimumnum, f64, V_MIN_F64_e64>;
+  def : FPBinOpPat<fmaximumnum, f64, V_MAX_F64_e64>;
+
+  let SubtargetPredicate = Has16BitInsts,
+      True16Predicate = NotHasTrue16BitInsts in {
+    def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_e64>;
+    def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_e64>;
+  }
+
+  let SubtargetPredicate = Has16BitInsts,
+      True16Predicate = UseRealTrue16Insts in {
+    def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_t16_e64>;
+    def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_t16_e64>;
+  }
+
+  let SubtargetPredicate = Has16BitInsts,
+      True16Predicate = UseFakeTrue16Insts in {
+    def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_fake16_e64>;
+    def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_fake16_e64>;
+  }
+
+  let SubtargetPredicate = HasVOP3PInsts in {
+    def : FPPkBinOpPat<fminimumnum, v2f16, V_PK_MIN_F16>;
+    def : FPPkBinOpPat<fmaximumnum, v2f16, V_PK_MAX_F16>;
+  }
+}
+
 /********** ============================================ **********/
 /********** Extraction, Insertion, Building and Casting  **********/
 /********** ============================================ **********/
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 46da9d33639b6..86e73ed03f187 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2019,9 +2019,7 @@ define float @v_fneg_minimumnum_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
   %fneg = fneg float %min
@@ -2044,8 +2042,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %a)
   %min.fneg = fneg float %min
@@ -2068,8 +2065,7 @@ define float @v_fneg_posk_minimumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_posk_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, -4.0, v0
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float 4.0, float %a)
   %fneg = fneg float %min
@@ -2092,8 +2088,7 @@ define float @v_fneg_negk_minimumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_negk_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, 4.0, v0
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, 4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float -4.0, float %a)
   %fneg = fneg float %min
@@ -2251,8 +2246,7 @@ define float @v_fneg_neg0_minimumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_neg0_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, 0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float -0.0, float %a)
   %fneg = fneg float %min
@@ -2299,7 +2293,6 @@ define float @v_fneg_0_minimumnum_foldable_use_f32_no_ieee(float %a, float %b) #
 ; GCN-LABEL: v_fneg_0_minimumnum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -2330,9 +2323,7 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
 ; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2364,9 +2355,7 @@ define float @v_fneg_maximumnum_f32_no_ieee(float %a, float %b) #4 {
 ; GCN-LABEL: v_fneg_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %b)
   %fneg = fneg float %max
@@ -2389,8 +2378,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %a)
   %max.fneg = fneg float %max
@@ -2413,8 +2401,7 @@ define float @v_fneg_posk_maximumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_posk_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, -4.0, v0
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float 4.0, float %a)
   %fneg = fneg float %max
@@ -2437,8 +2424,7 @@ define float @v_fneg_negk_maximumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_negk_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, 4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float -4.0, float %a)
   %fneg = fneg float %max
@@ -2473,8 +2459,7 @@ define float @v_fneg_neg0_maximumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_neg0_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, 0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float -0.0, float %a)
   %fneg = fneg float %max
@@ -2499,7 +2484,6 @@ define float @v_fneg_0_maximumnum_foldable_use_f32_no_ieee(float %a, float %b) #
 ; GCN-LABEL: v_fneg_0_maximumnum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
 ; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -2530,9 +2514,7 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
 ; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %b)
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index c45d86ce306e7..4f73e8e9c1883 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -3414,8 +3414,8 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v2, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
@@ -3652,57 +3652,57 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v4, v5
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v2
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-SDAG-LABEL: v_maximumnum_v3f16:
-; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
-; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX950-SDAG-LABEL: v_maximumnum_v3f16:
-; GFX950-SDAG:       ; %bb.0:
-; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX950-SDAG-NEXT:    s_nop 0
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
-; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v3f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
@@ -3712,8 +3712,8 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
@@ -3722,11 +3722,11 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX11-SDAG-LABEL: v_maximumnum_v3f16:
 ; GFX11-SDAG:       ; %bb.0:
 ; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3735,10 +3735,10 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3750,11 +3750,11 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v0, v0, v2
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v1, v1, v3
 ; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3767,10 +3767,10 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v3
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3834,12 +3834,19 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_v3f16_nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_v3f16_nnan:
 ; GFX10:       ; %bb.0:
@@ -3939,16 +3946,16 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v2, v2
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v4, v6
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v4, v5
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v5, v7
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v2, v5
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v2, v1
@@ -3965,16 +3972,16 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_maximumnum_v4f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_maximumnum_v4f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -3988,6 +3995,18 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v2
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v2
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_maximumnum_v4f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4003,8 +4022,8 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
@@ -4026,10 +4045,10 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4058,10 +4077,10 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v3
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4268,22 +4287,22 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v3, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v3, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v4, v4
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v6, v9
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v6, v7
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v7, v10
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v4, v4
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v3, v7
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v8, v11
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v4, v7
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v6, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
@@ -4304,19 +4323,19 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_max_f16 v2, v2, v3
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_maximumnum_v6f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v5
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v3
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_maximumnum_v6f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -4333,6 +4352,21 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_max_f16 v2, v2, v3
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v3
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_maximumnum_v6f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4351,10 +4385,10 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v4
@@ -4380,15 +4414,14 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v5
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4419,15 +4452,14 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v3
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v4
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v5
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <6 x half> @llvm.maximumnum.v6f16(<6 x half> %x, <6 x half> %y)
@@ -4554,28 +4586,28 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v4, v4
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v4, v4
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v14, v6, v6
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v15, v7, v7
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v8, v12
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v8, v9
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v9, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v4, v9
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v10, v14
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v6, v6
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v5, v9
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v11, v15
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v7, v7
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v6, v9
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v8, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v4, v1
@@ -4600,22 +4632,22 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_max_f16 v3, v3, v4
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_maximumnum_v8f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v6
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v7
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v6, v6
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v4
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_maximumnum_v8f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -4635,6 +4667,24 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_max_f16 v3, v3, v4
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v5, v5
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v6, v6
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v4
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_maximumnum_v8f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4656,12 +4706,12 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v4
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v5
@@ -4691,18 +4741,17 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v4
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v6
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v7
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4736,18 +4785,17 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v4
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v6
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v7
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %x, <8 x half> %y)
@@ -4978,52 +5026,52 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v16, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v8, v8
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v8, v8
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v16, v16, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v9, v9
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v9, v9
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v8, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v10, v10
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v10, v10
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v16, v16, v19
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v9, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v11, v11
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v11, v11
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v17, v8
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v4, v4
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v10, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v12, v12
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v4, v4
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v12, v12
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v18, v9
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v11, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v13, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v13, v13
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v19, v10
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v6, v6
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v12, v17
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v6, v6
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v14, v14
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v17, v11
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v7, v7
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v18, v12
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v14, v14
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v15, v15
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v19, v18
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v13, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v17, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v14, v7, v7
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v15, v15
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v14, v14, v17
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v16, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v8, v1
@@ -5031,8 +5079,8 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v4, v11, v4
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v5, v12, v5
-; GFX8-GISEL-NEXT:    v_or_b32_e32 v6, v18, v6
-; GFX8-GISEL-NEXT:    v_or_b32_e32 v7, v13, v7
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v6, v13, v6
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v7, v14, v7
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_maximumnum_v16f16:
@@ -5064,34 +5112,34 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_max_f16 v7, v7, v8
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_maximumnum_v16f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v9
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v10
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v11
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v12
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v13
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v6, v6, v14
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v7, v7, v15
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v9, v9
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v10, v10
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v12, v12
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v4, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v13, v13
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v5, v5, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v14, v14
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v15, v15
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v7, v7, v8
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_maximumnum_v16f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -5123,15 +5171,45 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_max_f16 v7, v7, v8
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_maximumnum_v16f16:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v9, v9
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v10, v10
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v12, v12
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v13, v13
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v14, v14
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v15, v15
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v8
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v8
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v11, v11
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v9
@@ -5156,29 +5234,29 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v10
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v12, v12
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v13, v13
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v14, v14
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v9
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v10
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v11
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v12
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v13
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v14
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v15
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v15, v15
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v8
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v9
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v10
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v11
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v12
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-LABEL: v_maximumnum_v16f16:
@@ -5214,29 +5292,29 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v10
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v12, v12
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v13, v13
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v14, v14
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v8
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v9
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v10
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v11
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v12
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v13
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v14
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v15
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v15, v15
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v8
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v9
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v10
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v11
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v12
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_maximumnum_v16f16:
@@ -5280,29 +5358,29 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v9
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v11, v11
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v10
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v12, v12
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v13, v13
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v14, v14
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v8
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v9
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v10
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v11
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v12
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v13
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v14
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v15
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v15, v15
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v8
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v9
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v10
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v11
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v12
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %x, <16 x half> %y)
   ret <16 x half> %result
@@ -6174,34 +6252,34 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v16
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v17
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v18
@@ -6285,34 +6363,34 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v16
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v17
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v18
@@ -6396,34 +6474,34 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v16
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v17
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v18
@@ -6516,34 +6594,34 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v16, v16, v16
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v17, v17, v17
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v18, v18, v18
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v19, v19, v19
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v20, v20, v20
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v21, v21, v21
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v22, v22, v22
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v23, v23, v23
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v24, v24, v24
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v25, v25, v25
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v26, v26, v26
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v27, v27, v27
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v28, v28, v28
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v29, v29, v29
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v30, v30, v30
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v16
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v17
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v18
@@ -6584,11 +6662,11 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; GFX7-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v3
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_maximumnum_v2f32:
@@ -6606,11 +6684,11 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; GFX8-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v3
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_maximumnum_v2f32:
@@ -6624,29 +6702,16 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_maximumnum_v2f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_maximumnum_v2f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v2f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -6663,8 +6728,8 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
@@ -6784,14 +6849,14 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
 ; GFX7-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v5
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_maximumnum_v3f32:
@@ -6812,14 +6877,14 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
 ; GFX8-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v5
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_maximumnum_v3f32:
@@ -6836,40 +6901,19 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_max_f32_e32 v2, v2, v3
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_maximumnum_v3f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_maximumnum_v3f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v6, v3
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v7, v4
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v4, v5
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v6, v6
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v7, v7
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v4, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v4, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v3f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -6889,10 +6933,10 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
@@ -6913,10 +6957,10 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX11-GISEL-LABEL: v_maximumnum_v3f32:
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4
 ; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6943,10 +6987,10 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
 ; GFX12-GISEL-NEXT:    v_max_num_f32_e32 v2, v2, v5
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7030,17 +7074,17 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v5
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v6
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v6, 1.0, v6
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v7, 1.0, v7
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v1, v1, v5
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; GFX7-GISEL-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_maximumnum_v4f32:
@@ -7064,17 +7108,17 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v5
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v6
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v6, 1.0, v6
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v7, 1.0, v7
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v1, v1, v5
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v3, v3, v7
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; GFX8-GISEL-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_maximumnum_v4f32:
@@ -7094,43 +7138,22 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_max_f32_e32 v3, v3, v4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_maximumnum_v4f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v6, v6, v6
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v7, v7, v7
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v5
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v6
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v7
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_maximumnum_v4f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v5, v5
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v6, v6
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v7, v7
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v3, v4
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v5, v5
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v6, v6
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v7, v7
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v3, v4
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v4f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -7153,12 +7176,12 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v4
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v5
@@ -7182,10 +7205,10 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7214,10 +7237,10 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7365,11 +7388,11 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v2f64:
@@ -7606,14 +7629,14 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v3f64:
@@ -7895,17 +7918,17 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX950-GISEL-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX950-GISEL-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_maximumnum_v4f64:
@@ -8091,10 +8114,10 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
 ; GFX7-SDAG-LABEL: v_maximumnum_f16_no_ieee:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -8107,89 +8130,35 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
 ; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-SDAG-LABEL: v_maximumnum_f16_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_maximumnum_f16_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_maximumnum_f16_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_maximumnum_f16_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_maximumnum_f16_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-TRUE16-SDAG:       ; %bb.0:
-; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-SDAG-NEXT:    v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-SDAG-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-SDAG-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_maximumnum_f16_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-TRUE16-GISEL:       ; %bb.0:
-; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-GISEL-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-GISEL-NEXT:    v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-GISEL-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_maximumnum_f16_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-FAKE16-SDAG:       ; %bb.0:
-; GFX11-FAKE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
-; GFX11-FAKE16-GISEL:       ; %bb.0:
-; GFX11-FAKE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-GISEL-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX11-FAKE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: v_maximumnum_f16_no_ieee:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
 ; GFX12-TRUE16-SDAG:       ; %bb.0:
@@ -8320,85 +8289,35 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
 }
 
 define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 {
-; GFX7-SDAG-LABEL: v_maximumnum_f32_no_ieee:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-SDAG-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: v_maximumnum_f32_no_ieee:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-GISEL-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: v_maximumnum_f32_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-SDAG-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_maximumnum_f32_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-GISEL-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_maximumnum_f32_no_ieee:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_maximumnum_f32_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_maximumnum_f32_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-GISEL-LABEL: v_maximumnum_f32_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_maximumnum_f32_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_maximumnum_f32_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_maximumnum_f32_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_maximumnum_f32_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_max_f32_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_maximumnum_f32_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_maximumnum_f32_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8472,87 +8391,35 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
 }
 
 define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 {
-; GFX7-SDAG-LABEL: v_maximumnum_f64_no_ieee:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: v_maximumnum_f64_no_ieee:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: v_maximumnum_f64_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_maximumnum_f64_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_maximumnum_f64_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f64_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_maximumnum_f64_no_ieee:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_maximumnum_f64_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_maximumnum_f64_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-GISEL-LABEL: v_maximumnum_f64_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_maximumnum_f64_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_maximumnum_f64_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_maximumnum_f64_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_maximumnum_f64_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_maximumnum_f64_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_maximumnum_f64_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8631,14 +8498,14 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX7-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-SDAG-NEXT:    v_max_f32_e32 v0, v0, v2
 ; GFX7-SDAG-NEXT:    v_max_f32_e32 v1, v1, v3
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -8659,11 +8526,7 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX8-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-SDAG-NEXT:    v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-SDAG-NEXT:    v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -8671,82 +8534,28 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX8-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v2, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX950-SDAG:       ; %bb.0:
-; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-SDAG-NEXT:    s_nop 0
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-GISEL-NEXT:    s_nop 0
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_max_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8909,12 +8718,19 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index 5cb051d2ab857..558006d2b6957 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -3239,8 +3239,8 @@ define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_min_f16_e32 v2, v2, v3
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
@@ -3477,57 +3477,57 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v4, v5
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v3, v3
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v1, v1, v2
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-SDAG-LABEL: v_minimumnum_v3f16:
-; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
-; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX9-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX950-SDAG-LABEL: v_minimumnum_v3f16:
-; GFX950-SDAG:       ; %bb.0:
-; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v2, v3, v3
-; GFX950-SDAG-NEXT:    s_nop 0
-; GFX950-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
-; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v3f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX10-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
@@ -3537,8 +3537,8 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
@@ -3547,11 +3547,11 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX11-SDAG-LABEL: v_minimumnum_v3f16:
 ; GFX11-SDAG:       ; %bb.0:
 ; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3560,10 +3560,10 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3575,11 +3575,11 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-SDAG-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_pk_min_num_f16 v0, v0, v2
 ; GFX12-SDAG-NEXT:    v_pk_min_num_f16 v1, v1, v3
 ; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3592,10 +3592,10 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v2
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v3
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3659,12 +3659,19 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimumnum_v3f16_nnan:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimumnum_v3f16_nnan:
 ; GFX10:       ; %bb.0:
@@ -3764,16 +3771,16 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v2, v2
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v4, v6
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v4, v5
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v2, v5, v7
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v2, v2, v5
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v2, v1
@@ -3790,16 +3797,16 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_minimumnum_v4f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_minimumnum_v4f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -3813,6 +3820,18 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_min_f16 v1, v1, v2
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v3, v3
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v2
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_minimumnum_v4f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3828,8 +3847,8 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
@@ -3851,10 +3870,10 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3883,10 +3902,10 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v2
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v3
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4093,22 +4112,22 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v3, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v3, v3
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v4, v4
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v6, v6, v9
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v6, v6, v7
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v7, v10
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v4, v4
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v3, v3, v7
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v8, v11
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v7, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v4, v7
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v6, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
@@ -4129,19 +4148,19 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_min_f16 v2, v2, v3
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_minimumnum_v6f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v3
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v4
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v2, v2, v5
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v2, v2, v3
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_minimumnum_v6f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -4158,6 +4177,21 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_min_f16 v2, v2, v3
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v5, v5
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v2, v2, v3
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_minimumnum_v6f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4176,10 +4210,10 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v3
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v4
@@ -4205,15 +4239,14 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v2, v2, v5
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4244,15 +4277,14 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v3
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v4
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v2, v2, v5
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y)
@@ -4379,28 +4411,28 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v4, v4
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v4, v4
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v14, v6, v6
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v15, v7, v7
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v8, v8, v12
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v8, v8, v9
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v9, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v4, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v4, v4, v9
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v5, v10, v14
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v5, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v6, v6
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v5, v5, v9
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v6, v11, v15
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v6, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v7, v7
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v6, v6, v9
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v8, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v4, v1
@@ -4425,22 +4457,22 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_min_f16 v3, v3, v4
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_minimumnum_v8f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v4
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v2, v2, v6
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v3, v3, v7
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v1, v1, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v6, v6
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v3, v3, v4
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_minimumnum_v8f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -4460,6 +4492,24 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_min_f16 v3, v3, v4
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX950-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v5, v5
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v6, v6
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v2, v2, v4
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v7, v7
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v3, v3, v4
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-SDAG-LABEL: v_minimumnum_v8f16:
 ; GFX10-SDAG:       ; %bb.0:
 ; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4481,12 +4531,12 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v4
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v5
@@ -4516,18 +4566,17 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v4
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v2, v2, v6
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v3, v3, v7
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4561,18 +4610,17 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v4
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v2, v2, v6
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v3, v3, v7
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y)
@@ -4803,52 +4851,52 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_max_f16_e32 v16, v0, v0
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v8, v8
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v8, v8
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v16, v16, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v9, v9
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v8, v1, v1
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v9, v9
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v2, v2
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v8, v8, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v10, v10
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v9, v2, v2
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v10, v10
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v16, v16, v19
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v3, v3
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v9, v9, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v11, v11
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v10, v3, v3
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v11, v11
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v8, v17, v8
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v4, v4
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v10, v10, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v12, v12
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v11, v4, v4
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v12, v12
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v9, v18, v9
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v5, v5
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v11, v11, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v13, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v12, v5, v5
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v13, v13
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v10, v19, v10
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v19, v6, v6
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v12, v12, v17
+; GFX8-GISEL-NEXT:    v_min_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v6, v6
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v14, v14
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v11, v17, v11
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v7, v7
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v12, v18, v12
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v18, v14, v14
 ; GFX8-GISEL-NEXT:    v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v13, v15, v15
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v18, v19, v18
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v13, v13, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v13, v17, v13
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v14, v7, v7
+; GFX8-GISEL-NEXT:    v_max_f16_e32 v17, v15, v15
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v14, v14, v17
 ; GFX8-GISEL-NEXT:    v_min_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v16, v0
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v8, v1
@@ -4856,8 +4904,8 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v4, v11, v4
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v5, v12, v5
-; GFX8-GISEL-NEXT:    v_or_b32_e32 v6, v18, v6
-; GFX8-GISEL-NEXT:    v_or_b32_e32 v7, v13, v7
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v6, v13, v6
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v7, v14, v7
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_minimumnum_v16f16:
@@ -4889,34 +4937,34 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX900-SDAG-NEXT:    v_pk_min_f16 v7, v7, v8
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-GISEL-LABEL: v_minimumnum_v16f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX9-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v9
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v2, v2, v10
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v3, v3, v11
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v4, v4, v12
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v5, v5, v13
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v6, v6, v14
-; GFX9-GISEL-NEXT:    v_pk_min_f16 v7, v7, v15
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v9, v9
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v1, v1, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v10, v10
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v2, v2, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v3, v3, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v12, v12
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v4, v4, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v13, v13
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v5, v5, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v14, v14
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
+; GFX900-GISEL-NEXT:    v_pk_max_f16 v8, v15, v15
+; GFX900-GISEL-NEXT:    v_pk_min_f16 v7, v7, v8
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-SDAG-LABEL: v_minimumnum_v16f16:
 ; GFX950-SDAG:       ; %bb.0:
@@ -4948,15 +4996,45 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX950-SDAG-NEXT:    v_pk_min_f16 v7, v7, v8
 ; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_minimumnum_v16f16:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX950-GISEL:       ; %bb.0:
+; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v9, v9
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v10, v10
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v2, v2, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v3, v3, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v12, v12
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v4, v4, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v13, v13
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v5, v5, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v14, v14
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v6, v6, v8
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v15, v15
+; GFX950-GISEL-NEXT:    s_nop 0
+; GFX950-GISEL-NEXT:    v_pk_min_f16 v7, v7, v8
+; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v8, v8
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v9, v9, v9
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX10-SDAG-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-SDAG-NEXT:    v_pk_min_f16 v0, v0, v8
 ; GFX10-SDAG-NEXT:    v_pk_max_f16 v8, v11, v11
 ; GFX10-SDAG-NEXT:    v_pk_min_f16 v1, v1, v9
@@ -4981,29 +5059,29 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v2, v2, v10
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v12, v12
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v13, v13
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v14, v14
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v9
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v2, v2, v10
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v3, v3, v11
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v4, v4, v12
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v5, v5, v13
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v6, v6, v14
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v7, v7, v15
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v15, v15
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v3, v3, v8
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v4, v4, v9
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v5, v5, v10
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v6, v6, v11
+; GFX10-GISEL-NEXT:    v_pk_min_f16 v7, v7, v12
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-LABEL: v_minimumnum_v16f16:
@@ -5039,29 +5117,29 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v11, v11
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v2, v2, v10
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v12, v12
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v13, v13
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v14, v14
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v8
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v9
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v2, v2, v10
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v3, v3, v11
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v4, v4, v12
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v5, v5, v13
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v6, v6, v14
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v7, v7, v15
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v15, v15
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v3, v3, v8
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v4, v4, v9
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v5, v5, v10
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v6, v6, v11
+; GFX11-GISEL-NEXT:    v_pk_min_f16 v7, v7, v12
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_minimumnum_v16f16:
@@ -5105,29 +5183,29 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v9
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v11, v11
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v2, v2, v10
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v12, v12
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v13, v13
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v14, v14
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v8
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v9
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v2, v2, v10
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v3, v3, v11
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v4, v4, v12
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v5, v5, v13
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v6, v6, v14
-; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v7, v7, v15
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v15, v15
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v3, v3, v8
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v4, v4, v9
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v5, v5, v10
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v6, v6, v11
+; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v7, v7, v12
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y)
   ret <16 x half> %result
@@ -5999,34 +6077,34 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX950-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX950-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v16
 ; GFX950-GISEL-NEXT:    v_pk_min_f16 v1, v1, v17
 ; GFX950-GISEL-NEXT:    v_pk_min_f16 v2, v2, v18
@@ -6110,34 +6188,34 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX10-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX10-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v16
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v1, v1, v17
 ; GFX10-GISEL-NEXT:    v_pk_min_f16 v2, v2, v18
@@ -6221,34 +6299,34 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v16, v16, v16
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v17, v17, v17
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v18, v18, v18
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v19, v19, v19
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v4, v4, v4
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v20, v20, v20
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v5, v5, v5
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v21, v21, v21
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v6, v6, v6
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v22, v22, v22
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v7, v7, v7
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v23, v23, v23
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v8, v8, v8
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v24, v24, v24
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v9, v9, v9
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v25, v25, v25
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v10, v10, v10
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v26, v26, v26
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v11, v11, v11
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v27, v27, v27
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v12, v12, v12
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v28, v28, v28
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v13, v13, v13
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v29, v29, v29
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v14, v14, v14
 ; GFX11-GISEL-NEXT:    v_pk_max_f16 v30, v30, v30
+; GFX11-GISEL-NEXT:    v_pk_max_f16 v15, v15, v15
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v16
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v1, v1, v17
 ; GFX11-GISEL-NEXT:    v_pk_min_f16 v2, v2, v18
@@ -6341,34 +6419,34 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v0, v0, v0
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v16, v16, v16
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
-; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v17, v17, v17
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v2, v2, v2
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v18, v18, v18
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v19, v19, v19
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v4, v4, v4
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v20, v20, v20
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v5, v5, v5
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v21, v21, v21
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v6, v6, v6
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v22, v22, v22
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v7, v7, v7
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v23, v23, v23
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v8, v8, v8
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v24, v24, v24
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v9, v9, v9
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v25, v25, v25
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v10, v10, v10
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v26, v26, v26
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v11, v11, v11
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v27, v27, v27
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v12, v12, v12
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v28, v28, v28
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v13, v13, v13
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v29, v29, v29
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v14, v14, v14
 ; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v30, v30, v30
+; GFX12-GISEL-NEXT:    v_pk_max_num_f16 v15, v15, v15
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v0, v0, v16
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v1, v1, v17
 ; GFX12-GISEL-NEXT:    v_pk_min_num_f16 v2, v2, v18
@@ -6409,11 +6487,11 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; GFX7-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v3
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_minimumnum_v2f32:
@@ -6431,11 +6509,11 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; GFX8-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v3
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_minimumnum_v2f32:
@@ -6449,29 +6527,16 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_minimumnum_v2f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_minimumnum_v2f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v2f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -6488,8 +6553,8 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
@@ -6609,14 +6674,14 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
 ; GFX7-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v2, v2, v5
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v5
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_minimumnum_v3f32:
@@ -6637,14 +6702,14 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
 ; GFX8-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v2, v2, v5
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v5
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_minimumnum_v3f32:
@@ -6661,40 +6726,19 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_min_f32_e32 v2, v2, v3
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_minimumnum_v3f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v2, v2, v5
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_minimumnum_v3f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v6, v3
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v7, v4
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_mov_b32_e32 v4, v5
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v6, v6
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v7, v7
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v4, v4
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v4, v4
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v5, v5
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v3f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -6714,10 +6758,10 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v0, v0, v3
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
@@ -6738,10 +6782,10 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX11-GISEL-LABEL: v_minimumnum_v3f32:
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v3, v3, v3
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v4, v4, v4
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4
 ; GFX11-GISEL-NEXT:    v_min_f32_e32 v2, v2, v5
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6768,10 +6812,10 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
 ; GFX12-GISEL-NEXT:    v_min_num_f32_e32 v2, v2, v5
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6855,17 +6899,17 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v5
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v6
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
 ; GFX7-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v6, 1.0, v6
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v7, 1.0, v7
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v1, v1, v5
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v3, v3, v7
+; GFX7-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; GFX7-GISEL-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: v_minimumnum_v4f32:
@@ -6889,17 +6933,17 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v5
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v6
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
 ; GFX8-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v4
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v5
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v6, 1.0, v6
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v7, 1.0, v7
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v1, v1, v5
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v3, v3, v7
+; GFX8-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; GFX8-GISEL-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-SDAG-LABEL: v_minimumnum_v4f32:
@@ -6919,43 +6963,22 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX9-SDAG-NEXT:    v_min_f32_e32 v3, v3, v4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-GISEL-LABEL: v_minimumnum_v4f32:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v6, v6, v6
-; GFX900-GISEL-NEXT:    v_max_f32_e32 v7, v7, v7
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v1, v1, v5
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v2, v2, v6
-; GFX900-GISEL-NEXT:    v_min_f32_e32 v3, v3, v7
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_minimumnum_v4f32:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_pk_mul_f32 v[6:7], 1.0, v[6:7] op_sel_hi:[0,1]
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v5, v5
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v6, v6
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
-; GFX950-GISEL-NEXT:    v_max_f32_e32 v4, v7, v7
-; GFX950-GISEL-NEXT:    v_min_f32_e32 v3, v3, v4
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v5, v5
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v6, v6
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v7, v7
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v3, v3, v4
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v4f32:
 ; GFX10-SDAG:       ; %bb.0:
@@ -6978,12 +7001,12 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v4, v4, v4
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v5, v5, v5
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v6, v6, v6
+; GFX10-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
 ; GFX10-GISEL-NEXT:    v_max_f32_e32 v7, v7, v7
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v0, v0, v4
 ; GFX10-GISEL-NEXT:    v_min_f32_e32 v1, v1, v5
@@ -7007,10 +7030,10 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
 ; GFX11-GISEL-NEXT:    v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5
 ; GFX11-GISEL-NEXT:    v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7039,10 +7062,10 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
 ; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
-; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
 ; GFX12-GISEL-NEXT:    v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
-; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX12-GISEL-NEXT:    v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
 ; GFX12-GISEL-NEXT:    v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
 ; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7190,11 +7213,11 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
 ; GFX950-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
+; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[4:5]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v2f64:
@@ -7431,14 +7454,14 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
 ; GFX950-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-GISEL-NEXT:    v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT:    v_min_f64 v[4:5], v[4:5], v[6:7]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v3f64:
@@ -7720,17 +7743,17 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
 ; GFX950-GISEL:       ; %bb.0:
 ; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[10:11], v[10:11]
+; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[12:13], v[12:13]
+; GFX950-GISEL-NEXT:    v_min_f64 v[4:5], v[4:5], v[8:9]
 ; GFX950-GISEL-NEXT:    v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[8:9], v[8:9]
-; GFX950-GISEL-NEXT:    v_max_f64 v[10:11], v[10:11], v[10:11]
-; GFX950-GISEL-NEXT:    v_max_f64 v[12:13], v[12:13], v[12:13]
-; GFX950-GISEL-NEXT:    v_max_f64 v[14:15], v[14:15], v[14:15]
-; GFX950-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-GISEL-NEXT:    v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX950-GISEL-NEXT:    v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX950-GISEL-NEXT:    v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT:    v_max_f64 v[8:9], v[14:15], v[14:15]
+; GFX950-GISEL-NEXT:    v_min_f64 v[6:7], v[6:7], v[8:9]
 ; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-SDAG-LABEL: v_minimumnum_v4f64:
@@ -7916,10 +7939,10 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
 ; GFX7-SDAG-LABEL: v_minimumnum_f16_no_ieee:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7932,89 +7955,35 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
 ; GFX7-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-SDAG-LABEL: v_minimumnum_f16_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_minimumnum_f16_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_minimumnum_f16_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_minimumnum_f16_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_minimumnum_f16_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-TRUE16-SDAG:       ; %bb.0:
-; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-SDAG-NEXT:    v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-SDAG-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-SDAG-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_minimumnum_f16_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-TRUE16-GISEL:       ; %bb.0:
-; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-GISEL-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-GISEL-NEXT:    v_max_f16_e32 v0.h, v1.l, v1.l
-; GFX11-TRUE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-GISEL-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
-; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_minimumnum_f16_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-FAKE16-SDAG:       ; %bb.0:
-; GFX11-FAKE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
-; GFX11-FAKE16-GISEL:       ; %bb.0:
-; GFX11-FAKE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-FAKE16-GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX11-FAKE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-GISEL-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX11-FAKE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: v_minimumnum_f16_no_ieee:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
 ; GFX12-TRUE16-SDAG:       ; %bb.0:
@@ -8145,85 +8114,35 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
 }
 
 define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
-; GFX7-SDAG-LABEL: v_minimumnum_f32_no_ieee:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-SDAG-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: v_minimumnum_f32_no_ieee:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX7-GISEL-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: v_minimumnum_f32_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-SDAG-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_minimumnum_f32_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-GISEL-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GFX8-GISEL-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_minimumnum_f32_no_ieee:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_minimumnum_f32_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_minimumnum_f32_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-GISEL-LABEL: v_minimumnum_f32_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_minimumnum_f32_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_minimumnum_f32_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_minimumnum_f32_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_minimumnum_f32_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_min_f32_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_minimumnum_f32_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8297,87 +8216,35 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
 }
 
 define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
-; GFX7-SDAG-LABEL: v_minimumnum_f64_no_ieee:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: v_minimumnum_f64_no_ieee:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX7-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX7-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-SDAG-LABEL: v_minimumnum_f64_no_ieee:
-; GFX8-SDAG:       ; %bb.0:
-; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-GISEL-LABEL: v_minimumnum_f64_no_ieee:
-; GFX8-GISEL:       ; %bb.0:
-; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX8-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX8-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: v_minimumnum_f64_no_ieee:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f64_no_ieee:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_minimumnum_f64_no_ieee:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: v_minimumnum_f64_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-LABEL: v_minimumnum_f64_no_ieee:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-GISEL-LABEL: v_minimumnum_f64_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX10-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX10-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_minimumnum_f64_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_minimumnum_f64_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-SDAG-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_minimumnum_f64_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_minimumnum_f64_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX11-GISEL-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_minimumnum_f64_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8456,14 +8323,14 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX7-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-SDAG-NEXT:    v_min_f32_e32 v0, v0, v2
 ; GFX7-SDAG-NEXT:    v_min_f32_e32 v1, v1, v3
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -8484,11 +8351,7 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX8-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT:    v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-SDAG-NEXT:    v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX8-SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX8-SDAG-NEXT:    v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-SDAG-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -8496,82 +8359,28 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
 ; GFX8-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
 ; GFX8-GISEL:       ; %bb.0:
 ; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v2, v0, v0
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_max_f16_e32 v3, v1, v1
-; GFX8-GISEL-NEXT:    v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT:    v_min_f16_e32 v2, v2, v3
-; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX8-GISEL-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX900-GISEL:       ; %bb.0:
-; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX900-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX900-GISEL-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX950-SDAG:       ; %bb.0:
-; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-SDAG-NEXT:    s_nop 0
-; GFX950-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX950-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX950-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX950-GISEL:       ; %bb.0:
-; GFX950-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX950-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX950-GISEL-NEXT:    s_nop 0
-; GFX950-GISEL-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX950-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX10-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX10-GISEL-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-SDAG-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v0, v0, v0
-; GFX11-GISEL-NEXT:    v_pk_max_f16 v1, v1, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_pk_min_f16 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_min_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
 ; GFX12-SDAG:       ; %bb.0:
@@ -8734,12 +8543,19 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
 ; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
 ; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT:    v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-SDAG-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_pk_min_f16 v0, v0, v2
+; GFX9-GISEL-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
 ; GFX10:       ; %bb.0:



More information about the llvm-commits mailing list