[llvm] Revert add GenericFloatingPointPredicateUtils #140254 (PR #141257)

Tim Gymnich via llvm-commits llvm-commits at lists.llvm.org
Fri May 23 10:02:25 PDT 2025


https://github.com/tgymnich created https://github.com/llvm/llvm-project/pull/141257

None

>From 9247a2c96393b1cca3fcf3d5c48b48e6890c1e5c Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 14:47:18 +0000
Subject: [PATCH 1/5] guard against non-virtual registers

---
 llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 67b1a449f8483..f05a291defff6 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -693,6 +693,9 @@ static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
 void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
                                              FPClassTest InterestedClasses,
                                              unsigned Depth) {
+  if (!R.isVirtual())
+    return;
+  
   LLT Ty = MRI.getType(R);
   APInt DemandedElts =
       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
@@ -736,6 +739,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
 
   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
 
+  if (!R.isVirtual())
+    return;
+
   MachineInstr &MI = *MRI.getVRegDef(R);
   unsigned Opcode = MI.getOpcode();
   LLT DstTy = MRI.getType(R);

>From 3669f1f8d920311d2dab86e2e89da0bbb841a4b3 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:33:06 +0000
Subject: [PATCH 2/5] replace isKnownNeverNaN impl

---
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h  |  6 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  4 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  4 +-
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         | 88 ++-----------------
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  2 +-
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   | 10 +--
 6 files changed, 20 insertions(+), 94 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 684a9bf554fb1..503f61216d9e6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -336,12 +336,12 @@ bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
 
 /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
 /// this returns if \p Val can be assumed to never be a signaling NaN.
-bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking,
                      bool SNaN = false);
 
 /// Returns true if \p Val can be assumed to never be a signaling NaN.
-inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
-  return isKnownNeverNaN(Val, MRI, true);
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking) {
+  return isKnownNeverNaN(Val, MRI, ValueTracking, true);
 }
 
 Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..8952226ae7f1e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6519,8 +6519,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect(
 CombinerHelper::SelectPatternNaNBehaviour
 CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
                                         bool IsOrderedComparison) const {
-  bool LHSSafe = isKnownNeverNaN(LHS, MRI);
-  bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+  bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT);
+  bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT);
   // Completely unsafe.
   if (!LHSSafe && !RHSSafe)
     return SelectPatternNaNBehaviour::NOT_APPLICABLE;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7b18a98d7f3ca..e242df04a5d80 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8179,10 +8179,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
     // Note this must be done here, and not as an optimization combine in the
     // absence of a dedicate quiet-snan instruction as we're using an
     // omni-purpose G_FCANONICALIZE.
-    if (!isKnownNeverSNaN(Src0, MRI))
+    if (!isKnownNeverSNaN(Src0, MRI, VT))
       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
 
-    if (!isKnownNeverSNaN(Src1, MRI))
+    if (!isKnownNeverSNaN(Src1, MRI, VT))
       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
   }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 64af7a57e8d12..227fac4007463 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,6 +12,7 @@
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/CodeGenCommonISel.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -806,88 +807,13 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
   return FoldedElements;
 }
 
-bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT,
                            bool SNaN) {
-  const MachineInstr *DefMI = MRI.getVRegDef(Val);
-  if (!DefMI)
-    return false;
-
-  const TargetMachine& TM = DefMI->getMF()->getTarget();
-  if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
-    return true;
-
-  // If the value is a constant, we can obviously see if it is a NaN or not.
-  if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
-    return !FPVal->getValueAPF().isNaN() ||
-           (SNaN && !FPVal->getValueAPF().isSignaling());
-  }
-
-  if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
-    for (const auto &Op : DefMI->uses())
-      if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
-        return false;
-    return true;
-  }
-
-  switch (DefMI->getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FREM:
-  case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FTAN:
-  case TargetOpcode::G_FACOS:
-  case TargetOpcode::G_FASIN:
-  case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
-  case TargetOpcode::G_FCOSH:
-  case TargetOpcode::G_FSINH:
-  case TargetOpcode::G_FTANH:
-  case TargetOpcode::G_FMA:
-  case TargetOpcode::G_FMAD:
-    if (SNaN)
-      return true;
-
-    // TODO: Need isKnownNeverInfinity
-    return false;
-  case TargetOpcode::G_FMINNUM_IEEE:
-  case TargetOpcode::G_FMAXNUM_IEEE: {
-    if (SNaN)
-      return true;
-    // This can return a NaN if either operand is an sNaN, or if both operands
-    // are NaN.
-    return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
-            isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
-           (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
-            isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
-  }
-  case TargetOpcode::G_FMINNUM:
-  case TargetOpcode::G_FMAXNUM: {
-    // Only one needs to be known not-nan, since it will be returned if the
-    // other ends up being one.
-    return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
-           isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
-  }
-  }
-
-  if (SNaN) {
-    // FP operations quiet. For now, just handle the ones inserted during
-    // legalization.
-    switch (DefMI->getOpcode()) {
-    case TargetOpcode::G_FPEXT:
-    case TargetOpcode::G_FPTRUNC:
-    case TargetOpcode::G_FCANONICALIZE:
-      return true;
-    default:
-      return false;
-    }
-  }
-
-  return false;
+  KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
+  if (SNaN)
+    return FPClass.isKnownNever(fcSNan);
+  
+  return FPClass.isKnownNeverNaN();
 }
 
 Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 18a948d68e97b..2a6073c20c73b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -859,7 +859,7 @@ class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
     return CurDAG->isKnownNeverNaN(SDValue(N,0));
   }];
   let GISelPredicateCode = [{
-    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
+    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT);
   }];
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index f08502fb3d928..344b580773c9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -261,7 +261,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
   // nodes(max/min) have same behavior when one input is NaN and other isn't.
   // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
   // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
-  if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+  if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) {
     // Don't fold single use constant that can't be inlined.
     if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
         (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
@@ -291,8 +291,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
   // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
   // to 0.0 requires dx10_clamp = true.
   if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
-       isKnownNeverSNaN(Val, MRI)) ||
-      isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+       isKnownNeverSNaN(Val, MRI, VT)) ||
+      isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) {
     Reg = Val;
     return true;
   }
@@ -338,9 +338,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
   // no NaN inputs. Most often MI is marked with nnan fast math flag.
   // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
   // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
-  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) ||
       (getIEEE() && getDX10Clamp() &&
-       (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+       (isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) {
     Reg = Val;
     return true;
   }

>From fe2c76ce3527b621a7d6184996311e7645a112a9 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:35:11 +0000
Subject: [PATCH 3/5] fix bug in matchFPMed3ToClamp

---
 llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 344b580773c9d..7279fbe474212 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -329,6 +329,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
   Register Val = Src0->getOperand(0).getReg();
 
   auto isOp3Zero = [&]() {
+    if (MI.getNumOperands() < 5)
+      return false;
     MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
     if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
       return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);

>From db9e8f3be3e3e28ac337f311e97fc8660b2742db Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:35:41 +0000
Subject: [PATCH 4/5] fix fp semantics lookup for vectors

---
 .../CodeGen/GlobalISel/GISelValueTracking.cpp | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index f05a291defff6..f1e77d813f0df 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -1030,7 +1030,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
     //
     if ((Known.KnownFPClasses & fcZero) != fcNone &&
         !Known.isKnownNeverSubnormal()) {
-      DenormalMode Mode = MF->getDenormalMode(getFltSemanticForLLT(DstTy));
+      DenormalMode Mode = MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
       if (Mode != DenormalMode::getIEEE())
         Known.KnownFPClasses |= fcZero;
     }
@@ -1092,8 +1092,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
 
     // If the parent function flushes denormals, the canonical output cannot
     // be a denormal.
-    LLT Ty = MRI.getType(Val);
-    const fltSemantics &FPType = getFltSemanticForLLT(Ty.getScalarType());
+    LLT Ty = MRI.getType(Val).getScalarType();
+    const fltSemantics &FPType = getFltSemanticForLLT(Ty);
     DenormalMode DenormMode = MF->getDenormalMode(FPType);
     if (DenormMode == DenormalMode::getIEEE()) {
       if (KnownSrc.isKnownNever(fcPosZero))
@@ -1203,8 +1203,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
     if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
       Known.knownNot(fcNan);
 
-    LLT Ty = MRI.getType(Val);
-    const fltSemantics &FltSem = getFltSemanticForLLT(Ty.getScalarType());
+    LLT Ty = MRI.getType(Val).getScalarType();
+    const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
     DenormalMode Mode = MF->getDenormalMode(FltSem);
 
     if (KnownSrc.isKnownNeverLogicalZero(Mode))
@@ -1323,18 +1323,18 @@ void GISelValueTracking::computeKnownFPClass(Register R,
 
         // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
         if ((KnownLHS.isKnownNeverLogicalNegZero(
-                 MF->getDenormalMode(getFltSemanticForLLT(DstTy))) ||
+                 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
              KnownRHS.isKnownNeverLogicalNegZero(
-                 MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+                 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
             // Make sure output negative denormal can't flush to -0
             outputDenormalIsIEEEOrPosZero(*MF, DstTy))
           Known.knownNot(fcNegZero);
       } else {
         // Only fsub -0, +0 can return -0
         if ((KnownLHS.isKnownNeverLogicalNegZero(
-                 MF->getDenormalMode(getFltSemanticForLLT(DstTy))) ||
+                 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
              KnownRHS.isKnownNeverLogicalPosZero(
-                 MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+                 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
             // Make sure output negative denormal can't flush to -0
             outputDenormalIsIEEEOrPosZero(*MF, DstTy))
           Known.knownNot(fcNegZero);
@@ -1381,10 +1381,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
 
     if ((KnownRHS.isKnownNeverInfinity() ||
          KnownLHS.isKnownNeverLogicalZero(
-             MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+             MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
         (KnownLHS.isKnownNeverInfinity() ||
          KnownRHS.isKnownNeverLogicalZero(
-             MF->getDenormalMode(getFltSemanticForLLT(DstTy)))))
+             MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
       Known.knownNot(fcNan);
 
     break;
@@ -1437,9 +1437,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
           (KnownLHS.isKnownNeverInfinity() ||
            KnownRHS.isKnownNeverInfinity()) &&
           ((KnownLHS.isKnownNeverLogicalZero(
-               MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) ||
+               MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) ||
            (KnownRHS.isKnownNeverLogicalZero(
-               MF->getDenormalMode(getFltSemanticForLLT(DstTy)))))) {
+               MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))) {
         Known.knownNot(fcNan);
       }
 
@@ -1453,7 +1453,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
           KnownLHS.isKnownNeverInfinity() &&
           KnownRHS.isKnownNeverLogicalZero(
-              MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) {
+              MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) {
         Known.knownNot(fcNan);
       }
 
@@ -1478,10 +1478,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
     // Infinity, nan and zero propagate from source.
     computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
 
-    LLT DstTy = MRI.getType(Dst);
-    const fltSemantics &DstSem = getFltSemanticForLLT(DstTy.getScalarType());
-    LLT SrcTy = MRI.getType(Src);
-    const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy.getScalarType());
+    LLT DstTy = MRI.getType(Dst).getScalarType();
+    const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
+    LLT SrcTy = MRI.getType(Src).getScalarType();
+    const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
 
     // All subnormal inputs should be in the normal range in the result type.
     if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {

>From 0e0b1fb40e1f55e04074a0d49965b1921e78998b Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:35:57 +0000
Subject: [PATCH 5/5] update tests

---
 .../GlobalISel/clamp-fmed3-const-combine.ll   |   25 +-
 .../GlobalISel/clamp-minmax-const-combine.ll  |   40 +-
 .../GlobalISel/fmed3-min-max-const-combine.ll |   28 +-
 .../AMDGPU/GlobalISel/legalize-fmaxnum.mir    |   48 +-
 .../AMDGPU/GlobalISel/legalize-fminnum.mir    |   48 +-
 .../GlobalISel/legalize-vector-args-gfx7.mir  |   20 +-
 .../regbankcombiner-clamp-fmed3-const.mir     |   40 +-
 .../regbankcombiner-clamp-minmax-const.mir    |   18 +-
 .../regbankcombiner-fmed3-minmax-const.mir    |   10 +-
 llvm/test/CodeGen/AMDGPU/fmed3.ll             | 2926 ++++++++++++-----
 llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll        |  216 +-
 llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll        |  635 ++--
 llvm/test/CodeGen/AMDGPU/mad-mix.ll           |  107 +-
 llvm/test/CodeGen/AMDGPU/minmax.ll            |  140 +-
 14 files changed, 3039 insertions(+), 1262 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index c7676e9da6f49..0ca26b1b7d0df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 1.0, 0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -97,7 +100,8 @@ define float @test_fmed3_global_nnan(float %a) #3 {
 ; GFX10-LABEL: test_fmed3_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_global_nnan:
@@ -107,7 +111,9 @@ define float @test_fmed3_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
@@ -134,7 +140,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +180,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +191,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index e2e1c1147eeee..70276bd670715 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -51,7 +51,8 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    v_mul_f16_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f16 v0, v0, 0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_K1max_ValK0_f16:
@@ -61,7 +62,9 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f16_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f16 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul half %a, 2.0
   %maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0)
@@ -95,7 +98,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
 ; GFX10-LABEL: test_min_max_splat_padded_with_undef:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT:    v_pk_max_f16 v0, v0, 0
+; GFX10-NEXT:    v_pk_min_f16 v0, v0, 1.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +110,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX12-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, 0
+; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
   %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -231,7 +239,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_global_nnan:
@@ -241,7 +251,9 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_max_num_f32_e64 v0, v0, v0 clamp
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 1.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
@@ -305,9 +317,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -317,7 +327,9 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -341,7 +353,9 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 0, 1.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
@@ -381,9 +395,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index 75c4cd53e3bfc..97c86b9582784 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -236,12 +236,14 @@ define float @test_min_max_global_nnan(float %a) #2 {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_min_max_global_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -252,6 +254,8 @@ define float @test_min_max_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
@@ -263,13 +267,17 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_max_min_global_nnan:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
+; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_global_nnan:
@@ -279,7 +287,9 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -456,15 +466,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +497,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
index d977049de26f4..eb1f0096c113a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
@@ -291,7 +291,9 @@ body: |
     ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
     ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
@@ -411,11 +413,15 @@ body: |
     ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
@@ -493,15 +499,21 @@ body: |
     ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT4]]
+    ; SI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT5]]
+    ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
     ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
@@ -661,19 +673,27 @@ body: |
     ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT4]]
+    ; SI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT5]]
+    ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
     ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
     ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]]
+    ; SI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT6]]
+    ; SI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT7]]
+    ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]]
     ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
@@ -1040,11 +1060,15 @@ body: |
     ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
-    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
index 32c353d2c579c..4f99e6f8ea6a4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
@@ -291,7 +291,9 @@ body: |
     ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
     ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
     ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
@@ -411,11 +413,15 @@ body: |
     ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
@@ -493,15 +499,21 @@ body: |
     ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
     ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT4]]
+    ; SI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT5]]
+    ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32)
     ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
@@ -661,19 +673,27 @@ body: |
     ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
     ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT4]]
+    ; SI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT5]]
+    ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32)
     ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
     ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]]
+    ; SI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT6]]
+    ; SI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT7]]
+    ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]]
     ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
@@ -1040,11 +1060,15 @@ body: |
     ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
     ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
     ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
-    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
     ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
index 4328d47969a1e..29266b42227e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
@@ -290,23 +290,33 @@ body: |
     ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
     ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; GFX7-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
+    ; GFX7-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
+    ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16)
-    ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; GFX7-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
+    ; GFX7-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]]
+    ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16)
-    ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; GFX7-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT4]]
+    ; GFX7-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT5]]
+    ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
     ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
     ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16)
-    ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]]
+    ; GFX7-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT6]]
+    ; GFX7-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT7]]
+    ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]]
     ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32)
     ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
     ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16)
-    ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT8]], [[FPEXT9]]
+    ; GFX7-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT8]]
+    ; GFX7-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT9]]
+    ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]]
     ; GFX7-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE4]](s32)
     ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
     ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
index a97d905f2a978..129cbcfca6fa5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
@@ -162,8 +162,12 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     ;
     ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
     ; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
     ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
-    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
     ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
-    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     ;
     ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
     ; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
     ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
-    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+    ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %8:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
index 70fd67363648d..7e5555b68daad 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
@@ -441,13 +441,8 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %9:vgpr(s32) = COPY %2(s32)
@@ -564,13 +559,8 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %9:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
index 2f41d86100040..f329d126e66db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
@@ -469,11 +469,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %7:vgpr(s32) = COPY %2(s32)
@@ -502,11 +501,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     %7:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index f9a1472b4596f..60aabda10533d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -886,6 +886,7 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; SI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
@@ -923,33 +924,60 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_med3_f32 v2, v3, 2.0, 4.0
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v3
+; VI-GISEL-NEXT:    v_med3_f32 v2, v2, 2.0, 4.0
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[2:3]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, 2.0, 4.0
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -1158,7 +1186,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1205,20 +1238,25 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1249,7 +1287,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -1284,8 +1327,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1347,8 +1393,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v3, -1.0, v3
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1395,20 +1446,25 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1438,8 +1494,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v2, -v2, -v2
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -1473,9 +1534,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v2, -v2, -v2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1537,8 +1601,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v4
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1597,8 +1666,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v3, -1.0, v3
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1628,8 +1702,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -v3, -v3
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -1663,9 +1742,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -v3, -v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1728,8 +1810,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e64 v3, 1.0, |v3|
 ; SI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v4|
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, |v3|, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1789,8 +1875,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e64 v2, 1.0, |v2|
 ; VI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, |v2|, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1821,8 +1911,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GFX9-GISEL-NEXT:    v_max_f32_e64 v2, |v2|, |v2|
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, |v2|, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -1857,9 +1951,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v2, |v2|, |v2|
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, |v2|, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1930,7 +2027,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
 ; SI-GISEL-NEXT:    v_mul_f32_e64 v2, -1.0, |v2|
 ; SI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
 ; SI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v4|
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1992,7 +2092,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
 ; VI-GISEL-NEXT:    v_mul_f32_e64 v4, -1.0, |v7|
 ; VI-GISEL-NEXT:    v_mul_f32_e64 v2, -1.0, |v2|
 ; VI-GISEL-NEXT:    v_mul_f32_e64 v3, -1.0, |v3|
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -2025,7 +2128,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v2, -|v2|, -|v2|
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -2062,8 +2168,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -|v1|, -|v1|
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v2, -|v2|, -|v2|
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v3, -|v3|, -|v3|
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -2776,7 +2884,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -2823,70 +2937,118 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
-  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
-  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
-  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
-  %a = load volatile float, ptr addrspace(1) %gep0
-  %b = load volatile float, ptr addrspace(1) %gep1
-  %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
-  store float %med3, ptr addrspace(1) %outgep
-  ret void
-}
-
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
+  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  store float %med3, ptr addrspace(1) %outgep
+  ret void
+}
+
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2928,7 +3090,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -2975,53 +3143,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat1:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat1:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_min_f32 v4, v1, v2 :: v_dual_max_f32 v1, v2, v1
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -3081,7 +3297,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v2, -1.0, v2
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3128,20 +3349,25 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -3172,7 +3398,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -3207,8 +3438,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_min_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v1, v1, v2, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -3270,7 +3504,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3317,54 +3557,102 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat2:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat2:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
   %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
@@ -3422,7 +3710,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3469,53 +3763,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat3:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat3:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_min_f32 v4, v1, v2 :: v_dual_max_f32 v1, v2, v1
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -3574,7 +3916,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3621,53 +3969,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat4:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat4:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v2, v1 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -3726,10 +4122,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
-; SI-GISEL-NEXT:    s_endpgm
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
+; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
 ; VI-SDAG:       ; %bb.0:
@@ -3773,53 +4175,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat5:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat5:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v2, v1 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -3878,7 +4328,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3925,53 +4381,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat6:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat6:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_min_f32 v4, v2, v1 :: v_dual_max_f32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4030,7 +4534,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4077,53 +4587,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat7:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat7:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v2, v1 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4182,7 +4740,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4229,53 +4793,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat8:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat8:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v1, v2, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4334,7 +4946,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4381,53 +4999,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat9:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat9:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v2, v1, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4486,9 +5152,15 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
+; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
 ;
 ; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
@@ -4533,53 +5205,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat10:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat10:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v1, v2, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4638,7 +5358,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4685,53 +5411,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat11:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat11:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v2, v1, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4790,7 +5564,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4837,53 +5617,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat12:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat12:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v2, v1, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -4942,7 +5770,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -4989,53 +5823,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat13:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat13:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v2, v1, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -5094,7 +5976,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -5141,53 +6029,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat14:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat14:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v1, v2, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -5246,7 +6182,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v3, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -5293,53 +6235,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v2, v7, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v5
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat15:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat15:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v2, v1, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v4
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v2, v1, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v3, v2, v1, v3
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v2, v1, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -5401,7 +6391,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -5448,53 +6444,101 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_med3_f32 v2, v7, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_med3_f32_pat16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_med3_f32_pat16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_dual_min_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v1, v1, v2, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -6400,7 +7444,10 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
 ; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
 ; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -6465,47 +7512,92 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
-; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
-; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v3, 4.0, v3 :: v_dual_min_f32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -6575,7 +7667,10 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
 ; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
 ; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -6640,47 +7735,92 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
-; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
-; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v3, 4.0, v3 :: v_dual_min_f32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -6750,7 +7890,10 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v2
 ; SI-GISEL-NEXT:    v_add_f32_e32 v3, 2.0, v3
 ; SI-GISEL-NEXT:    v_add_f32_e32 v4, 4.0, v4
-; SI-GISEL-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v5, v2, v3
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -6815,47 +7958,92 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v4, 1.0, v7
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; VI-GISEL-NEXT:    v_med3_f32 v2, v4, v2, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v5, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX9-NEXT:    v_add_f32_e32 v2, 2.0, v2
-; GFX9-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX9-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
-; GFX11-NEXT:    v_add_f32_e32 v3, 4.0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_med3_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v2, 2.0, v2
+; GFX9-GISEL-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-SDAG-NEXT:    v_add_f32_e32 v3, 4.0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_med3_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_add_f32 v3, 4.0, v3 :: v_dual_min_f32 v4, v1, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
@@ -7112,9 +8300,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v5, -1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; SI-GISEL-NEXT:    v_min_f32_e32 v5, v5, v3
 ; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
@@ -7178,10 +8369,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, -1.0, v7
-; VI-GISEL-NEXT:    v_max_f32_e32 v5, v7, v2
-; VI-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
-; VI-GISEL-NEXT:    v_min_f32_e32 v3, v5, v3
-; VI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 1.0, v7
+; VI-GISEL-NEXT:    v_min_f32_e32 v4, v4, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v5, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
+; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -7215,10 +8409,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
 ; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_max_f32_e64 v4, -v1, -v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v4, v4, v2
 ; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
-; GFX9-GISEL-NEXT:    v_min_f32_e32 v2, v4, v2
-; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v3
-; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v2, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v4, v1
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -7255,10 +8452,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
 ; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v4, -v1, -v1
-; GFX11-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v1, v1, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_min_f32_e32 v4, v4, v2
-; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v3, v4
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_minmax_f32 v1, v1, v2, v4
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -7322,8 +8521,11 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out
 ; SI-GISEL-NEXT:    s_mov_b64 s[8:9], s[6:7]
 ; SI-GISEL-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; SI-GISEL-NEXT:    v_max_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v4
+; SI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -7371,55 +8573,97 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s6
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v5, s7
-; VI-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
-; VI-GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-GISEL-NEXT:    flat_load_dword v7, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_load_dword v2, v[2:3] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    flat_load_dword v3, v[4:5] glc
+; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v6
+; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v5, vcc
+; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1] glc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_max_f32_e32 v2, v7, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 1.0, v7
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 1.0, v2
+; VI-GISEL-NEXT:    v_max_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; VI-GISEL-NEXT:    v_min_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_global_nnans_min_max_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[10:11] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[12:13] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v3, v0, s[14:15] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_max_f32_e32 v1, v1, v2
-; GFX9-NEXT:    v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT:    global_store_dword v0, v1, s[8:9]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_global_nnans_min_max_f32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-SDAG-NEXT:    v_min_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_global_nnans_min_max_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_maxmin_f32 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_global_nnans_min_max_f32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[10:11] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[12:13] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v3, v0, s[14:15] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v1
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    v_max_f32_e32 v2, v3, v3
+; GFX9-GISEL-NEXT:    v_min_f32_e32 v1, v1, v2
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[8:9]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_global_nnans_min_max_f32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_maxmin_f32 v1, v1, v2, v3
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_global_nnans_min_max_f32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2
+; GFX11-GISEL-NEXT:    v_max_f32_e32 v3, v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_maxmin_f32 v1, v1, v2, v3
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index cbd824e171976..850aeb60335e8 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -350,22 +350,22 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %
 ; SDAG-GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; SDAG-GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX9-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
-; VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
 ; SDAG-CI:       ; %bb.0:
@@ -378,19 +378,41 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %
 ; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
 ; GISEL-GFX11:       ; %bb.0:
 ; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX11-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GISEL-GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX9-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX9-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v1, v0
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
   %src0.ext = fpext half %src0 to float
@@ -405,27 +427,27 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %
 }
 
 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 {
-; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT:    v_mac_f32_e32 v2, v0, v1
-; VI-NEXT:    v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
 ; SDAG-CI:       ; %bb.0:
@@ -435,6 +457,36 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half
 ; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v1, v0 clamp
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX9-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -464,36 +516,36 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half
 }
 
 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 {
-; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX11-NEXT:    v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX11-NEXT:    global_store_b16 v[0:1], v3, off dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX11-NEXT:    v_fma_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX11-NEXT:    global_store_b16 v[0:1], v3, off dlc
+; SDAG-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX9-NEXT:    global_store_short v[0:1], v3, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; SDAG-GFX9:       ; %bb.0:
+; SDAG-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX9-NEXT:    global_store_short v[0:1], v3, off
+; SDAG-GFX9-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-GFX9-NEXT:    v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; VI-NEXT:    v_mac_f32_e32 v2, v0, v1
-; VI-NEXT:    v_cvt_f16_f32_e32 v0, v2
-; VI-NEXT:    flat_store_short v[0:1], v0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SDAG-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT:    v_cvt_f16_f32_e32 v0, v2
+; SDAG-VI-NEXT:    flat_store_short v[0:1], v0
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-VI-NEXT:    v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
 ; SDAG-CI:       ; %bb.0:
@@ -507,6 +559,42 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi
 ; SDAG-CI-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_fma_mixlo_f16 v1, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_med3_f16 v0, v1, 0, 1.0
+; GISEL-GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-GFX11-NEXT:    global_store_b16 v[0:1], v1, off dlc
+; GISEL-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; GISEL-GFX9:       ; %bb.0:
+; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT:    v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX9-NEXT:    global_store_short v[0:1], v0, off
+; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-GFX9-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT:    flat_store_short v[0:1], v0
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GISEL-VI-NEXT:    v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 32e0d393a1001..a9d07877b3887 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -271,32 +271,38 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
 }
 
 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
-; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-TRUE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX1100-TRUE16:       ; %bb.0:
+; SDAG-GFX1100-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-FAKE16-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX1100-FAKE16:       ; %bb.0:
+; SDAG-GFX1100-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; GFX906:       ; %bb.0:
-; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX906-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX900:       ; %bb.0:
+; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT:    v_mac_f32_e32 v2, v0, v1
-; VI-NEXT:    v_cvt_f16_f32_e64 v0, v2 clamp
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-GFX906:       ; %bb.0:
+; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; SDAG-VI-NEXT:    v_cvt_f16_f32_e64 v0, v2 clamp
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
 ; SDAG-CI:       ; %bb.0:
@@ -306,6 +312,39 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
 ; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, v0 clamp
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-GFX1100:       ; %bb.0:
+; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-GFX900:       ; %bb.0:
+; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-GFX906:       ; %bb.0:
+; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT:    v_med3_f16 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v2
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_min_f16_e32 v0, 1.0, v0
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -348,28 +387,28 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src
 ; SDAG-GFX1100-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; SDAG-GFX900:       ; %bb.0:
+; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
-; GFX906:       ; %bb.0:
-; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX906-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; SDAG-GFX906:       ; %bb.0:
+; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
+; SDAG-GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
-; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SDAG-VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
 ; SDAG-CI:       ; %bb.0:
@@ -382,17 +421,45 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src
 ; GISEL-GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
-; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; GISEL-GFX900:       ; %bb.0:
+; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX900-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; GISEL-GFX906:       ; %bb.0:
+; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
+; GISEL-GFX906-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
   %src0.ext = fpext half %src0 to float
@@ -914,30 +981,39 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
 ; FIXME (DAG): Fold clamp
 
 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    v_mov_b32_e32 v0, v3
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-GFX1100-TRUE16:       ; %bb.0:
+; SDAG-GFX1100-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b32_e32 v0, v3
+; SDAG-GFX1100-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mov_b32_e32 v0, v3
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-GFX1100-FAKE16:       ; %bb.0:
+; SDAG-GFX1100-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    v_mov_b32_e32 v0, v3
+; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt:
-; GFX906:       ; %bb.0:
-; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_mov_b32_e32 v0, v3
-; GFX906-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-GFX900:       ; %bb.0:
+; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mov_b32_e32 v0, v3
+; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; SDAG-GFX906:       ; %bb.0:
+; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_mov_b32_e32 v0, v3
+; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
 ; SDAG-VI:       ; %bb.0:
@@ -978,6 +1054,35 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
 ; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v1, v1 clamp
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GISEL-GFX1100:       ; %bb.0:
+; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_pk_max_f16 v0, v3, 0
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GISEL-GFX900:       ; %bb.0:
+; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_pk_max_f16 v0, v3, 0
+; GISEL-GFX900-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt:
+; GISEL-GFX906:       ; %bb.0:
+; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_pk_max_f16 v0, v3, 0
+; GISEL-GFX906-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
 ; GISEL-VI:       ; %bb.0:
 ; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -989,8 +1094,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v3, v4
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e64 v0, v5 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v1, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v5
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, 0x3c00
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_max_f16_e32 v1, 0, v1
+; GISEL-VI-NEXT:    v_min_f16_e32 v0, 1.0, v0
+; GISEL-VI-NEXT:    v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1147,33 +1257,36 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v6
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_pk_max_f16 v0, v6, 0
+; GISEL-GFX1100-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT:    v_pk_max_f16 v0, v6, 0
 ; GISEL-GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-GFX900-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
 ; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT:    v_pk_max_f16 v0, v6, 0
 ; GISEL-GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v3
+; GISEL-GFX906-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
 ; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
@@ -1190,11 +1303,18 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v8, v6, v7
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v4, v0, v2
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e64 v0, v8 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v2, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v8
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e64 v1, v5 clamp
-; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v2
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v1, v4
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v2, v5
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_mov_b32_e32 v3, 0x3c00
+; GISEL-VI-NEXT:    v_max_f16_e32 v1, 0, v1
+; GISEL-VI-NEXT:    v_max_f16_e32 v2, 0, v2
+; GISEL-VI-NEXT:    v_min_f16_e32 v0, 1.0, v0
+; GISEL-VI-NEXT:    v_min_f16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-VI-NEXT:    v_min_f16_e32 v1, 1.0, v2
+; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v3
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
@@ -1247,39 +1367,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 }
 
 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
-; GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX1100-TRUE16:       ; %bb.0:
+; SDAG-GFX1100-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
+; SDAG-GFX1100-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    v_mov_b32_e32 v0, v6
-; GFX900-NEXT:    v_mov_b32_e32 v1, v2
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX1100-FAKE16:       ; %bb.0:
+; SDAG-GFX1100-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-FAKE16-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
+; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
-; GFX906:       ; %bb.0:
-; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    v_mov_b32_e32 v0, v6
-; GFX906-NEXT:    v_mov_b32_e32 v1, v2
-; GFX906-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX900:       ; %bb.0:
+; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    v_mov_b32_e32 v0, v6
+; SDAG-GFX900-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; SDAG-GFX906:       ; %bb.0:
+; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    v_mov_b32_e32 v0, v6
+; SDAG-GFX906-NEXT:    v_mov_b32_e32 v1, v2
+; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
 ; SDAG-VI:       ; %bb.0:
@@ -1358,6 +1490,48 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
 ; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v3, v3 clamp
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-GFX1100:       ; %bb.0:
+; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX1100-NEXT:    v_pk_max_f16 v0, v6, 0
+; GISEL-GFX1100-NEXT:    v_pk_max_f16 v1, v7, 0
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX1100-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX1100-NEXT:    v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-GFX900:       ; %bb.0:
+; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_pk_max_f16 v0, v6, 0
+; GISEL-GFX900-NEXT:    v_pk_max_f16 v1, v7, 0
+; GISEL-GFX900-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX900-NEXT:    v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt:
+; GISEL-GFX906:       ; %bb.0:
+; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_pk_max_f16 v0, v6, 0
+; GISEL-GFX906-NEXT:    v_pk_max_f16 v1, v7, 0
+; GISEL-GFX906-NEXT:    v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX906-NEXT:    v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
 ; GISEL-VI:       ; %bb.0:
 ; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1377,10 +1551,19 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v4, v0, v2
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v11, v7, v9
 ; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e64 v0, v10 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v1, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e64 v2, v11 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v3, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v10
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v1, v4
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v2, v11
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v3, v5
+; GISEL-VI-NEXT:    v_max_f16_e32 v0, 0, v0
+; GISEL-VI-NEXT:    v_max_f16_e32 v1, 0, v1
+; GISEL-VI-NEXT:    v_max_f16_e32 v2, 0, v2
+; GISEL-VI-NEXT:    v_max_f16_e32 v3, 0, v3
+; GISEL-VI-NEXT:    v_mov_b32_e32 v4, 0x3c00
+; GISEL-VI-NEXT:    v_min_f16_e32 v0, 1.0, v0
+; GISEL-VI-NEXT:    v_min_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GISEL-VI-NEXT:    v_min_f16_e32 v2, 1.0, v2
+; GISEL-VI-NEXT:    v_min_f16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-VI-NEXT:    v_or_b32_e32 v1, v2, v3
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
@@ -1528,7 +1711,7 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
 ; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v4, v3
-; GISEL-GFX1100-NEXT:    v_max_f16_e64 v3, v3, v3 clamp
+; GISEL-GFX1100-NEXT:    v_med3_f16 v3, v3, 0, 1.0
 ; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GISEL-GFX1100-NEXT:    v_and_b32_e32 v0, 0xffff, v3
@@ -1539,20 +1722,22 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_max_f16_e64 v4, v3, v3 clamp
+; GISEL-GFX900-NEXT:    v_med3_f16 v4, v3, 0, 1.0
 ; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, 0xffff0000
-; GISEL-GFX900-NEXT:    v_and_or_b32 v0, v3, v0, v4
+; GISEL-GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX900-NEXT:    v_mov_b32_e32 v1, 0xffff0000
+; GISEL-GFX900-NEXT:    v_and_or_b32 v0, v3, v1, v0
 ; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_max_f16_e64 v4, v3, v3 clamp
+; GISEL-GFX906-NEXT:    v_med3_f16 v4, v3, 0, 1.0
 ; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, 0xffff0000
-; GISEL-GFX906-NEXT:    v_and_or_b32 v0, v3, v0, v4
+; GISEL-GFX906-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX906-NEXT:    v_mov_b32_e32 v1, 0xffff0000
+; GISEL-GFX906-NEXT:    v_and_or_b32 v0, v3, v1, v0
 ; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
@@ -1690,13 +1875,15 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    v_fma_mixlo_f16 v4, v0, v1, v2 op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GISEL-GFX1100-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GISEL-GFX1100-NEXT:    v_med3_f16 v3, v3, 0, 1.0
 ; GISEL-GFX1100-NEXT:    v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GISEL-GFX1100-NEXT:    v_lshlrev_b32_e32 v0, 16, v3
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX1100-NEXT:    v_and_or_b32 v0, 0xffff, v4, v0
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1704,9 +1891,10 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX900-NEXT:    v_med3_f16 v0, v4, 0, 1.0
+; GISEL-GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GISEL-GFX900-NEXT:    v_mov_b32_e32 v1, 0xffff
 ; GISEL-GFX900-NEXT:    v_and_or_b32 v0, v3, v1, v0
@@ -1716,9 +1904,10 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX906-NEXT:    v_med3_f16 v0, v4, 0, 1.0
+; GISEL-GFX906-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-GFX906-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GISEL-GFX906-NEXT:    v_mov_b32_e32 v1, 0xffff
 ; GISEL-GFX906-NEXT:    v_and_or_b32 v0, v3, v1, v0
@@ -1872,10 +2061,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 ; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v1, v3
+; GISEL-GFX1100-NEXT:    v_med3_f32 v1, v3, 0, 1.0
+; GISEL-GFX1100-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX1100-NEXT:    v_pack_b32_f16 v0, v1, v0
@@ -1884,9 +2076,11 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 ; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt:
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v1, v3
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_med3_f32 v1, v3, 0, 1.0
+; GISEL-GFX900-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -1894,9 +2088,11 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt:
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v1, v3
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_med3_f32 v1, v3, 0, 1.0
+; GISEL-GFX906-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX906-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
@@ -1910,11 +2106,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v5, v2
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT:    v_mad_f32 v3, v3, v4, v5 clamp
-; GISEL-VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v1, v3
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GISEL-VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v3, v4
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_med3_f32 v0, v5, 0, 1.0
+; GISEL-VI-NEXT:    v_med3_f32 v1, v2, 0, 1.0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
@@ -1926,8 +2124,10 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v2, v4 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v4, v0, v2
+; GISEL-CI-NEXT:    v_mac_f32_e32 v5, v1, v3
+; GISEL-CI-NEXT:    v_med3_f32 v0, v4, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v1, v5, 0, 1.0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
@@ -2052,25 +2252,33 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
 ; GISEL-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v2, v6, 0, 1.0
+; GISEL-GFX1100-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v1, v1, 0, 1.0
+; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v2, v6
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GISEL-GFX1100-NEXT:    v_pack_b32_f16 v0, v2, v0
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt:
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v2, v6
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_med3_f32 v2, v6, 0, 1.0
+; GISEL-GFX900-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX900-NEXT:    v_med3_f32 v1, v1, 0, 1.0
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-GFX900-NEXT:    v_pack_b32_f16 v0, v2, v0
 ; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
@@ -2078,11 +2286,14 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
 ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt:
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v2, v6
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_med3_f32 v2, v6, 0, 1.0
+; GISEL-GFX906-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
+; GISEL-GFX906-NEXT:    v_med3_f32 v1, v1, 0, 1.0
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-GFX906-NEXT:    v_pack_b32_f16 v0, v2, v0
 ; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
@@ -2099,13 +2310,16 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; GISEL-VI-NEXT:    v_mad_f32 v6, v6, v7, v8 clamp
-; GISEL-VI-NEXT:    v_mad_f32 v0, v0, v2, v4 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v2, v6
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GISEL-VI-NEXT:    v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-VI-NEXT:    v_mac_f32_e32 v8, v6, v7
+; GISEL-VI-NEXT:    v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT:    v_med3_f32 v0, v8, 0, 1.0
+; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT:    v_med3_f32 v1, v4, 0, 1.0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_med3_f32 v1, v5, 0, 1.0
 ; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GISEL-VI-NEXT:    v_or_b32_e32 v0, v2, v0
+; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
@@ -2120,9 +2334,12 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v6, v6
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v3, v6 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v1, v1, v4, v7 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v2, v2, v5, v8 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v6, v0, v3
+; GISEL-CI-NEXT:    v_mac_f32_e32 v7, v1, v4
+; GISEL-CI-NEXT:    v_mac_f32_e32 v8, v2, v5
+; GISEL-CI-NEXT:    v_med3_f32 v0, v6, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v1, v7, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v2, v8, 0, 1.0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -2275,12 +2492,18 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
 ; GISEL-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
 ; GISEL-GFX1100:       ; %bb.0:
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v3, v6, 0, 1.0
+; GISEL-GFX1100-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v2, v2, 0, 1.0
+; GISEL-GFX1100-NEXT:    v_med3_f32 v1, v1, 0, 1.0
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v3, v6
+; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GISEL-GFX1100-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -2293,11 +2516,15 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
 ; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt:
 ; GISEL-GFX900:       ; %bb.0:
 ; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v3, v6
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_med3_f32 v3, v6, 0, 1.0
+; GISEL-GFX900-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    v_med3_f32 v2, v2, 0, 1.0
+; GISEL-GFX900-NEXT:    v_med3_f32 v1, v1, 0, 1.0
+; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-GFX900-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -2308,11 +2535,15 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
 ; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt:
 ; GISEL-GFX906:       ; %bb.0:
 ; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v3, v6
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_med3_f32 v3, v6, 0, 1.0
+; GISEL-GFX906-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    v_med3_f32 v2, v2, 0, 1.0
+; GISEL-GFX906-NEXT:    v_med3_f32 v1, v1, 0, 1.0
+; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-GFX906-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -2335,16 +2566,20 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v11, v5
 ; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT:    v_mad_f32 v6, v6, v8, v10 clamp
-; GISEL-VI-NEXT:    v_mad_f32 v0, v0, v2, v4 clamp
-; GISEL-VI-NEXT:    v_mad_f32 v2, v7, v9, v11 clamp
-; GISEL-VI-NEXT:    v_mad_f32 v1, v1, v3, v5 clamp
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v3, v6
-; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GISEL-VI-NEXT:    v_mac_f32_e32 v10, v6, v8
+; GISEL-VI-NEXT:    v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT:    v_mac_f32_e32 v11, v7, v9
+; GISEL-VI-NEXT:    v_mac_f32_e32 v5, v1, v3
+; GISEL-VI-NEXT:    v_med3_f32 v0, v10, 0, 1.0
+; GISEL-VI-NEXT:    v_med3_f32 v1, v4, 0, 1.0
+; GISEL-VI-NEXT:    v_med3_f32 v2, v11, 0, 1.0
+; GISEL-VI-NEXT:    v_med3_f32 v3, v5, 0, 1.0
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GISEL-VI-NEXT:    v_or_b32_e32 v0, v3, v0
-; GISEL-VI-NEXT:    v_or_b32_e32 v1, v2, v1
+; GISEL-VI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GISEL-VI-NEXT:    v_cvt_f16_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-VI-NEXT:    v_or_b32_e32 v1, v2, v3
 ; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
@@ -2362,10 +2597,14 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v10, v10
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v4, v8 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v1, v1, v5, v9 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v2, v2, v6, v10 clamp
-; GISEL-CI-NEXT:    v_mad_f32 v3, v3, v7, v11 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v8, v0, v4
+; GISEL-CI-NEXT:    v_mac_f32_e32 v9, v1, v5
+; GISEL-CI-NEXT:    v_mac_f32_e32 v10, v2, v6
+; GISEL-CI-NEXT:    v_mac_f32_e32 v11, v3, v7
+; GISEL-CI-NEXT:    v_med3_f32 v0, v8, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v1, v9, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v2, v10, 0, 1.0
+; GISEL-CI-NEXT:    v_med3_f32 v3, v11, 0, 1.0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GISEL-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index e2170fa406da4..53db04e21af6e 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -1599,41 +1599,41 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
 }
 
 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
-; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-GFX1100:       ; %bb.0:
+; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-GFX900:       ; %bb.0:
+; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; GFX906:       ; %bb.0:
-; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GFX906-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-GFX906:       ; %bb.0:
+; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; GFX9GEN:       ; %bb.0:
-; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
-; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-GFX9GEN:       ; %bb.0:
+; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
-; VI-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; SDAG-VI:       ; %bb.0:
+; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
 ; SDAG-CI:       ; %bb.0:
@@ -1641,13 +1641,56 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
 ; SDAG-CI-NEXT:    v_mad_f32 v0, v1, v3, v5 clamp
 ; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GISEL-GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-GFX1100:       ; %bb.0:
+; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX1100-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-GFX900:       ; %bb.0:
+; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX900-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-GFX906:       ; %bb.0:
+; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; GISEL-GFX906-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-GFX9GEN:       ; %bb.0:
+; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-GFX9GEN-NEXT:    v_med3_f32 v0, v2, 0, 1.0
+; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GISEL-VI:       ; %bb.0:
+; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-VI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
+; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
 ; GISEL-CI:       ; %bb.0:
 ; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v1
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v3
 ; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v5
-; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
+; GISEL-CI-NEXT:    v_mac_f32_e32 v2, v0, v1
+; GISEL-CI-NEXT:    v_med3_f32 v0, v2, 0, 1.0
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
   %src0.hi = extractelement <2 x half> %src0, i32 1
   %src1.hi = extractelement <2 x half> %src1, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index bdd8935d0df5e..2958ca7122cb7 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -445,23 +445,47 @@ define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b,
 }
 
 define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
-; GFX11-LABEL: test_med3_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: test_med3_f32:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SDAG-GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX12-LABEL: test_med3_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX11-LABEL: test_med3_f32:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GISEL-GFX11-NEXT:    v_min_f32_e32 v5, v2, v3
+; GISEL-GFX11-NEXT:    v_dual_max_f32 v2, v2, v3 :: v_dual_max_f32 v3, v4, v4
+; GISEL-GFX11-NEXT:    v_minmax_f32 v2, v2, v3, v5
+; GISEL-GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-LABEL: test_med3_f32:
+; SDAG-GFX12:       ; %bb.0:
+; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
+; SDAG-GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: test_med3_f32:
+; GISEL-GFX12:       ; %bb.0:
+; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GISEL-GFX12-NEXT:    v_min_num_f32_e32 v5, v2, v3
+; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v3 :: v_dual_max_num_f32 v3, v4, v4
+; GISEL-GFX12-NEXT:    v_minmax_num_f32 v2, v2, v3, v5
+; GISEL-GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
   %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
@@ -471,23 +495,47 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
 }
 
 define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
-; GFX11-LABEL: test_med3_minimumnum_maximumnum_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: test_med3_minimumnum_maximumnum_f32:
+; SDAG-GFX11:       ; %bb.0:
+; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
+; SDAG-GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX12-LABEL: test_med3_minimumnum_maximumnum_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX11-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
+; GISEL-GFX11-NEXT:    v_min_f32_e32 v5, v2, v3
+; GISEL-GFX11-NEXT:    v_dual_max_f32 v2, v2, v3 :: v_dual_max_f32 v3, v4, v4
+; GISEL-GFX11-NEXT:    v_minmax_f32 v2, v2, v3, v5
+; GISEL-GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-LABEL: test_med3_minimumnum_maximumnum_f32:
+; SDAG-GFX12:       ; %bb.0:
+; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
+; SDAG-GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GISEL-GFX12:       ; %bb.0:
+; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GISEL-GFX12-NEXT:    v_min_num_f32_e32 v5, v2, v3
+; GISEL-GFX12-NEXT:    v_dual_max_num_f32 v2, v2, v3 :: v_dual_max_num_f32 v3, v4, v4
+; GISEL-GFX12-NEXT:    v_minmax_num_f32 v2, v2, v3, v5
+; GISEL-GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
   %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
   %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
@@ -875,14 +923,24 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; GISEL-GFX11-TRUE16-LABEL: test_med3_f16:
 ; GISEL-GFX11-TRUE16:       ; %bb.0:
 ; GISEL-GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-TRUE16-NEXT:    v_med3_f16 v2.l, v2.l, v3.l, v4.l
+; GISEL-GFX11-TRUE16-NEXT:    v_max_f16_e32 v2.l, v2.l, v2.l
+; GISEL-GFX11-TRUE16-NEXT:    v_max_f16_e32 v2.h, v3.l, v3.l
+; GISEL-GFX11-TRUE16-NEXT:    v_min_f16_e32 v3.l, v2.l, v2.h
+; GISEL-GFX11-TRUE16-NEXT:    v_max_f16_e32 v2.l, v2.l, v2.h
+; GISEL-GFX11-TRUE16-NEXT:    v_max_f16_e32 v2.h, v4.l, v4.l
+; GISEL-GFX11-TRUE16-NEXT:    v_minmax_f16 v2.l, v2.l, v2.h, v3.l
 ; GISEL-GFX11-TRUE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX11-FAKE16-LABEL: test_med3_f16:
 ; GISEL-GFX11-FAKE16:       ; %bb.0:
 ; GISEL-GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-FAKE16-NEXT:    v_med3_f16 v2, v2, v3, v4
+; GISEL-GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v2, v2
+; GISEL-GFX11-FAKE16-NEXT:    v_max_f16_e32 v3, v3, v3
+; GISEL-GFX11-FAKE16-NEXT:    v_min_f16_e32 v5, v2, v3
+; GISEL-GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v2, v3
+; GISEL-GFX11-FAKE16-NEXT:    v_max_f16_e32 v3, v4, v4
+; GISEL-GFX11-FAKE16-NEXT:    v_minmax_f16 v2, v2, v3, v5
 ; GISEL-GFX11-FAKE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -915,7 +973,12 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
 ; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
 ; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-TRUE16-NEXT:    v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
+; GISEL-GFX12-TRUE16-NEXT:    v_max_num_f16_e32 v2.l, v2.l, v2.l
+; GISEL-GFX12-TRUE16-NEXT:    v_max_num_f16_e32 v2.h, v3.l, v3.l
+; GISEL-GFX12-TRUE16-NEXT:    v_min_num_f16_e32 v3.l, v2.l, v2.h
+; GISEL-GFX12-TRUE16-NEXT:    v_max_num_f16_e32 v2.l, v2.l, v2.h
+; GISEL-GFX12-TRUE16-NEXT:    v_max_num_f16_e32 v2.h, v4.l, v4.l
+; GISEL-GFX12-TRUE16-NEXT:    v_minmax_num_f16 v2.l, v2.l, v2.h, v3.l
 ; GISEL-GFX12-TRUE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -926,7 +989,12 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
 ; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
 ; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-FAKE16-NEXT:    v_med3_num_f16 v2, v2, v3, v4
+; GISEL-GFX12-FAKE16-NEXT:    v_max_num_f16_e32 v2, v2, v2
+; GISEL-GFX12-FAKE16-NEXT:    v_max_num_f16_e32 v3, v3, v3
+; GISEL-GFX12-FAKE16-NEXT:    v_min_num_f16_e32 v5, v2, v3
+; GISEL-GFX12-FAKE16-NEXT:    v_max_num_f16_e32 v2, v2, v3
+; GISEL-GFX12-FAKE16-NEXT:    v_max_num_f16_e32 v3, v4, v4
+; GISEL-GFX12-FAKE16-NEXT:    v_minmax_num_f16 v2, v2, v3, v5
 ; GISEL-GFX12-FAKE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %tmp0 = call half @llvm.minnum.f16(half %x, half %y)



More information about the llvm-commits mailing list