[llvm] 0b34ffe - AMDGPU/GlobalISel: Add clamp combine

Fri Dec 3 04:02:49 PST 2021

Author: Petar Avramovic
Date: 2021-12-03T12:49:39+01:00
New Revision: 0b34ffe4a61eab708b594f23a7781c90ec22423d

URL: https://github.com/llvm/llvm-project/commit/0b34ffe4a61eab708b594f23a7781c90ec22423d
DIFF: https://github.com/llvm/llvm-project/commit/0b34ffe4a61eab708b594f23a7781c90ec22423d.diff

LOG: AMDGPU/GlobalISel: Add clamp combine

Add clamp combine. Source is fminnum(fmaxnum(Val, 0.0), 1.0) or
fmaxnum(fminnum(Val, 1.0), 0.0) or fmed3 intrinsic with 0.0 and
1.0 as two out of three operands.

Differential Revision: https://reviews.llvm.org/D90052

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCombine.td
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 482474692788..2415fdfecaae 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -73,6 +73,21 @@ def fp_minmax_to_med3 : GICombineRule<
          [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
   (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
 
+def fp_minmax_to_clamp : GICombineRule<
+  (defs root:$min_or_max, register_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_FMAXNUM,
+                           G_FMINNUM,
+                           G_FMAXNUM_IEEE,
+                           G_FMINNUM_IEEE):$min_or_max,
+         [{ return RegBankHelper.matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]),
+  (apply [{ RegBankHelper.applyClamp(*${min_or_max}, ${matchinfo}); }])>;
+
+def fmed3_intrinsic_to_clamp : GICombineRule<
+  (defs root:$fmed3, register_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_INTRINSIC):$fmed3,
+         [{ return RegBankHelper.matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]),
+  (apply [{ RegBankHelper.applyClamp(*${fmed3}, ${matchinfo}); }])>;
+
 def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">;
 
 def remove_fcanonicalize : GICombineRule<
@@ -113,7 +128,7 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
 def AMDGPURegBankCombinerHelper : GICombinerHelper<
   "AMDGPUGenRegBankCombinerHelper",
   [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
-   fp_minmax_to_med3]> {
+   fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
   let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
   let StateClass = "AMDGPURegBankCombinerHelperState";
   let AdditionalArguments = [];

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 84184b8ab2a3..7fd94a977be7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -173,6 +173,7 @@ def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
 def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
 def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
 def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;
+def : GINodeEquiv<G_AMDGPU_CLAMP, AMDGPUclamp>;
 
 def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
 def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 4f3d7fd0a9dc..3ce67a733c10 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/Target/TargetMachine.h"
 #define DEBUG_TYPE "amdgpu-regbank-combiner"
 
@@ -67,12 +68,18 @@ class AMDGPURegBankCombinerHelper {
 
   bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
   bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+  bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
+  bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
   void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+  void applyClamp(MachineInstr &MI, Register &Reg);
 
 private:
   AMDGPU::SIModeRegisterDefaults getMode();
   bool getIEEE();
+  bool getDX10Clamp();
   bool isFminnumIeee(const MachineInstr &MI);
+  bool isFCst(MachineInstr *MI);
+  bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
 };
 
 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
@@ -167,19 +174,20 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
 // ieee = true  : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
 // ieee = false : min/max(NaN, K) = K
+// clamp(NaN) = dx10_clamp ? 0.0 : NaN
 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
 // Other operand commutes (see matchMed) give same result since min and max are
 // commutative.
 
 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
-// with fmed3(Val, K0, K1).
+// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
 // Val = SNaN only for ieee = true
 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
 // Val = NaN,ieee = false or Val = QNaN,ieee = true
 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
-// min(max(NaN, K0), K1) = min(K0, K1) = K0
+// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
 bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
     MachineInstr &MI, Med3MatchInfo &MatchInfo) {
@@ -217,6 +225,92 @@ bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
 
   return false;
 }
+
+bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
+                                                       Register &Reg) {
+  // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
+  auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
+  Register Val;
+  Optional<FPValueAndVReg> K0, K1;
+  // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
+  if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+    return false;
+
+  if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
+    return false;
+
+  // For IEEE=false perform combine only when it's safe to assume that there are
+  // no NaN inputs. Most often MI is marked with nnan fast math flag.
+  // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
+  // to 0.0 requires dx10_clamp = true.
+  if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
+       isKnownNeverSNaN(Val, MRI)) ||
+      isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+    Reg = Val;
+    return true;
+  }
+
+  return false;
+}
+
+// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
+// Val = SNaN only for ieee = true. It is important which operand is NaN.
+// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
+// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
+// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
+// Val = NaN,ieee = false or Val = QNaN,ieee = true
+// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
+// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
+// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
+bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
+                                                     Register &Reg) {
+  if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
+    return false;
+
+  // In llvm-ir, clamp is often represented as an intrinsic call to
+  // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
+  MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+  MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
+  MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
+
+  if (isFCst(Src0) && !isFCst(Src1))
+    std::swap(Src0, Src1);
+  if (isFCst(Src1) && !isFCst(Src2))
+    std::swap(Src1, Src2);
+  if (isFCst(Src0) && !isFCst(Src1))
+    std::swap(Src0, Src1);
+  if (!isClampZeroToOne(Src1, Src2))
+    return false;
+
+  Register Val = Src0->getOperand(0).getReg();
+
+  auto isOp3Zero = [&]() {
+    MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
+    if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
+      return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
+    return false;
+  };
+  // For IEEE=false perform combine only when it's safe to assume that there are
+  // no NaN inputs. Most often MI is marked with nnan fast math flag.
+  // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
+  // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
+  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+      (getIEEE() && getDX10Clamp() &&
+       (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+    Reg = Val;
+    return true;
+  }
+
+  return false;
+}
+
+void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
+  B.setInstrAndDebugLoc(MI);
+  B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
+               MI.getFlags());
+  MI.eraseFromParent();
+}
+
 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
                                             Med3MatchInfo &MatchInfo) {
   B.setInstrAndDebugLoc(MI);
@@ -233,10 +327,27 @@ AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
 
 bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
 
+bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
+
 bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
   return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
 }
 
+bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
+  return MI->getOpcode() == AMDGPU::G_FCONSTANT;
+}
+
+bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
+                                                   MachineInstr *K1) {
+  if (isFCst(K0) && isFCst(K1)) {
+    const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
+    const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
+    return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
+           (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
+  }
+  return false;
+}
+
 class AMDGPURegBankCombinerHelperState {
 protected:
   CombinerHelper &Helper;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 94e38dd93dcb..7981ccb3f2ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2872,6 +2872,12 @@ def G_AMDGPU_FMED3 : AMDGPUGenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_AMDGPU_CLAMP : AMDGPUGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
 // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
 // operand Expects a MachineMemOperand in addition to explicit
 // operands.

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
new file mode 100644
index 000000000000..4c4d6a4ea986
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
+; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %fmed = call nnan float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
+  ret float %fmed
+}
+
+define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
+; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul half %a, 2.0
+  %fmed = call nnan half @llvm.amdgcn.fmed3.f16(half %fmul, half 0.0, half 1.0)
+  ret half %fmed
+}
+
+; %fmin is known non-SNaN because fmin inputs are fcanonicalized
+define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
+; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e64 v0, 0x41200000, v0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
+  ret float %fmed
+}
+
+; input may be SNaN. It's safe to clamp since third operand in fmed3 is 0.0
+define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true(float %a) #2 {
+; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
+  ret float %fmed
+}
+
+; global nnan function attribute always forces clamp combine
+
+define float @test_fmed3_global_nnan(float %a) #3 {
+; GFX10-LABEL: test_fmed3_global_nnan:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
+  ret float %fmed
+}
+
+; ------------------------------------------------------------------------------
+; Negative patterns
+; ------------------------------------------------------------------------------
+
+; ieee=false requires known never NaN input
+define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
+; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 1.0, 0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
+  ret float %fmed
+}
+
+; ieee=true input is known non-SNaN but dx10_clamp=false
+define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
+; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 0x41200000, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
+  ret float %fmed
+}
+
+; ieee=true dx10_clamp=true but input may be SNaN, clamp requires third operand in fmed3 to be 0.0
+define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
+; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
+  ret float %fmed
+}
+
+declare half @llvm.amdgcn.fmed3.f16(half, half, half)
+declare float @llvm.amdgcn.fmed3.f32(float, float, float)
+declare float @llvm.minnum.f32(float, float)
+
+attributes #0 = {"amdgpu-ieee"="true"}
+attributes #1 = {"amdgpu-ieee"="false"}
+attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
+attributes #3 = {"no-nans-fp-math"="true"}
+attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
new file mode 100644
index 000000000000..566db10ae284
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -0,0 +1,262 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define float @test_min_max_ValK0_K1_f32(float %a) #0 {
+; GFX10-LABEL: test_min_max_ValK0_K1_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %maxnum = call nnan float @llvm.maxnum.f32(float %fmul, float 0.0)
+  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
+  ret float %fmed
+}
+
+define double @test_min_max_K0Val_K1_f64(double %a) #1 {
+; GFX10-LABEL: test_min_max_K0Val_K1_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul double %a, 2.0
+  %maxnum = call nnan double @llvm.maxnum.f64(double 0.0, double %fmul)
+  %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 1.0)
+  ret double %fmed
+}
+
+; min-max patterns for ieee=true, dx10_clamp=true don't have to check for NaNs
+define half @test_min_K1max_ValK0_f16(half %a) #2 {
+; GFX10-LABEL: test_min_K1max_ValK0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul half %a, 2.0
+  %maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0)
+  %fmed = call half @llvm.minnum.f16(half 1.0, half %maxnum)
+  ret half %fmed
+}
+
+define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
+; GFX10-LABEL: test_min_K1max_K0Val_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
+  %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half 0.0>, <2 x half> %fmul)
+  %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half 1.0>, <2 x half> %maxnum)
+  ret <2 x half> %fmed
+}
+
+define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
+; GFX10-LABEL: test_min_max_splat_padded_with_undef:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
+  %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half undef>, <2 x half> %fmul)
+  %fmed = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %maxnum)
+  ret <2 x half> %fmed
+}
+
+; max-mix patterns work only for known non-NaN inputs
+
+define float @test_max_min_ValK1_K0_f32(float %a) #0 {
+; GFX10-LABEL: test_max_min_ValK1_K0_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %minnum = call nnan float @llvm.minnum.f32(float %fmul, float 1.0)
+  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
+  ret float %fmed
+}
+
+define double @test_max_min_K1Val_K0_f64(double %a) #1 {
+; GFX10-LABEL: test_max_min_K1Val_K0_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul double %a, 2.0
+  %minnum = call nnan double @llvm.minnum.f64(double 1.0, double %fmul)
+  %fmed = call nnan double @llvm.maxnum.f64(double %minnum, double 0.0)
+  ret double %fmed
+}
+
+define half @test_max_K0min_ValK1_f16(half %a) #0 {
+; GFX10-LABEL: test_max_K0min_ValK1_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul half %a, 2.0
+  %minnum = call nnan half @llvm.minnum.f16(half %fmul, half 1.0)
+  %fmed = call nnan half @llvm.maxnum.f16(half 0.0, half %minnum)
+  ret half %fmed
+}
+
+; treat undef as value that will result in a constant splat
+define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
+; GFX10-LABEL: test_max_K0min_K1Val_v2f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
+  %minnum = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %fmul)
+  %fmed = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half undef, half 0.0>, <2 x half> %minnum)
+  ret <2 x half> %fmed
+}
+
+; global nnan function attribute always forces clamp combine
+
+define float @test_min_max_global_nnan(float %a) #3 {
+; GFX10-LABEL: test_min_max_global_nnan:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
+  %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+  ret float %fmed
+}
+
+define float @test_max_min_global_nnan(float %a) #3 {
+; GFX10-LABEL: test_max_min_global_nnan:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %minnum = call float @llvm.minnum.f32(float %a, float 1.0)
+  %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+  ret float %fmed
+}
+
+; ------------------------------------------------------------------------------
+; Negative patterns
+; ------------------------------------------------------------------------------
+
+; min(max(Val, 1.0), 0.0), should be min(max(Val, 0.0), 1.0)
+define float @test_min_max_K0_gt_K1(float %a) #0 {
+; GFX10-LABEL: test_min_max_K0_gt_K1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 1.0)
+  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 0.0)
+  ret float %fmed
+}
+
+; max(min(Val, 0.0), 1.0), should be max(min(Val, 1.0), 0.0)
+define float @test_max_min_K0_gt_K1(float %a) #0 {
+; GFX10-LABEL: test_max_min_K0_gt_K1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_min_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %minnum = call nnan float @llvm.minnum.f32(float %a, float 0.0)
+  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 1.0)
+  ret float %fmed
+}
+
+; Input that can be NaN
+
+; min-max patterns for ieee=false require known non-NaN input
+define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
+  %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+  ret float %fmed
+}
+
+; clamp fails here since input can be NaN and dx10_clamp=false; fmed3 succeds
+define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #4 {
+; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
+  %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+  ret float %fmed
+}
+
+; max-min patterns always require known non-NaN input
+
+define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
+; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
+  %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+  ret float %fmed
+}
+
+define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %fmul = fmul float %a, 2.0
+  %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
+  %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+  ret float %fmed
+}
+
+declare half @llvm.minnum.f16(half, half)
+declare half @llvm.maxnum.f16(half, half)
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare double @llvm.maxnum.f64(double, double)
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
+declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
+attributes #0 = {"amdgpu-ieee"="true"}
+attributes #1 = {"amdgpu-ieee"="false"}
+attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
+attributes #3 = {"no-nans-fp-math"="true"}
+attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
new file mode 100644
index 000000000000..e1f33d0563e9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
@@ -0,0 +1,261 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: test_fmed3_f32_known_nnan_ieee_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_f32_known_nnan_ieee_true
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %8:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %8
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %9:vgpr(s32) = COPY %5(s32)
+    %10:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %9(s32), %10(s32)
+    $vgpr0 = COPY %4(s32)
+...
+
+---
+name: test_fmed3_f16_known_nnan_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_f16_known_nnan_ieee_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    %2:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s16) = G_TRUNC %2(s32)
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %10:vgpr(s16) = COPY %3(s16)
+    %4:vgpr(s16) = G_FMUL %0, %10
+    %7:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %6:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %11:vgpr(s16) = COPY %6(s16)
+    %12:vgpr(s16) = COPY %7(s16)
+    %5:vgpr(s16) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %4(s16), %11(s16), %12(s16)
+    %9:vgpr(s32) = G_ANYEXT %5(s16)
+    $vgpr0 = COPY %9(s32)
+...
+
+---
+name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 1.000000e+01
+    %8:vgpr(s32) = G_FCANONICALIZE %0
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMINNUM_IEEE %8, %9
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %10:vgpr(s32) = COPY %5(s32)
+    %11:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %10(s32), %11(s32)
+    $vgpr0 = COPY %4(s32)
+...
+
+---
+name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %8:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %8
+    %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %9:vgpr(s32) = COPY %5(s32)
+    %10:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %9(s32), %10(s32)
+    $vgpr0 = COPY %4(s32)
+...
+
+# FixMe: add tests with attributes #3 = {"no-nans-fp-math"="true"}
+
+---
+name: test_fmed3_f32_maybe_NaN_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_f32_maybe_NaN_ieee_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FMUL]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %8:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %8
+    %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %9:vgpr(s32) = COPY %5(s32)
+    %10:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %9(s32), %10(s32)
+    $vgpr0 = COPY %4(s32)
+...
+
+---
+name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: false
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FMINNUM_IEEE]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 1.000000e+01
+    %8:vgpr(s32) = G_FCANONICALIZE %0
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMINNUM_IEEE %8, %9
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %10:vgpr(s32) = COPY %5(s32)
+    %11:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %10(s32), %11(s32)
+    $vgpr0 = COPY %4(s32)
+...
+
+---
+name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FMUL]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %8:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %8
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %9:vgpr(s32) = COPY %5(s32)
+    %10:vgpr(s32) = COPY %6(s32)
+    %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3(s32), %9(s32), %10(s32)
+    $vgpr0 = COPY %4(s32)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
new file mode 100644
index 000000000000..b7f2953f98bd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
@@ -0,0 +1,583 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_min_max_ValK0_K1_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_max_ValK0_K1_f32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %9
+    %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %10:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %10
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %11:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = nnan G_FMINNUM_IEEE %5, %11
+    $vgpr0 = COPY %7(s32)
+...
+
+---
+name: test_min_max_K0Val_K1_f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_min_max_K0Val_K1_f64
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s64) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s64) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_CLAMP]](s64)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+    %13:vgpr(s64) = COPY %4(s64)
+    %5:vgpr(s64) = G_FMUL %0, %13
+    %6:sgpr(s64) = G_FCONSTANT double 0.000000e+00
+    %14:vgpr(s64) = COPY %6(s64)
+    %7:vgpr(s64) = nnan G_FMAXNUM %14, %5
+    %8:sgpr(s64) = G_FCONSTANT double 1.000000e+00
+    %15:vgpr(s64) = COPY %8(s64)
+    %9:vgpr(s64) = nnan G_FMINNUM %7, %15
+    $vgpr0_vgpr1 = COPY %9(s64)
+...
+
+---
+name: test_min_K1max_ValK0_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_K1max_ValK0_f16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]]
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s16) = G_FCANONICALIZE [[FMUL]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    %2:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s16) = G_TRUNC %2(s32)
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %12:vgpr(s16) = COPY %3(s16)
+    %4:vgpr(s16) = G_FMUL %0, %12
+    %5:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %11:vgpr(s16) = G_FCANONICALIZE %4
+    %13:vgpr(s16) = COPY %5(s16)
+    %6:vgpr(s16) = G_FMAXNUM_IEEE %11, %13
+    %7:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %14:vgpr(s16) = COPY %7(s16)
+    %8:vgpr(s16) = G_FMINNUM_IEEE %14, %6
+    %10:vgpr(s32) = G_ANYEXT %8(s16)
+    $vgpr0 = COPY %10(s32)
+...
+
+---
+name: test_min_K1max_K0Val_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_K1max_K0Val_f16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %12:sgpr(s32) = G_ANYEXT %3(s16)
+    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %12(s32), %12(s32)
+    %6:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %13:sgpr(s32) = G_ANYEXT %6(s16)
+    %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %13(s32), %13(s32)
+    %9:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %14:sgpr(s32) = G_ANYEXT %9(s16)
+    %8:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %14(s32), %14(s32)
+    %15:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+    %4:vgpr(<2 x s16>) = G_FMUL %0, %15
+    %16:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+    %7:vgpr(<2 x s16>) = nnan G_FMAXNUM %16, %4
+    %17:vgpr(<2 x s16>) = COPY %8(<2 x s16>)
+    %10:vgpr(<2 x s16>) = nnan G_FMINNUM %17, %7
+    $vgpr0 = COPY %10(<2 x s16>)
+...
+
+---
+name: test_min_max_splat_padded_with_undef
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_max_splat_padded_with_undef
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[FMUL]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %17:sgpr(s32) = G_ANYEXT %3(s16)
+    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %17(s32)
+    %6:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %18:sgpr(s32) = G_ANYEXT %6(s16)
+    %19:sgpr(s32) = G_IMPLICIT_DEF
+    %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %18(s32), %19(s32)
+    %10:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %20:sgpr(s32) = G_ANYEXT %10(s16)
+    %9:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %20(s32), %19(s32)
+    %21:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+    %4:vgpr(<2 x s16>) = G_FMUL %0, %21
+    %16:vgpr(<2 x s16>) = G_FCANONICALIZE %4
+    %22:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+    %8:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %22, %16
+    %23:vgpr(<2 x s16>) = COPY %9(<2 x s16>)
+    %11:vgpr(<2 x s16>) = G_FMINNUM_IEEE %23, %8
+    $vgpr0 = COPY %11(<2 x s16>)
+...
+
+---
+name: test_max_min_ValK1_K0_f32
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_min_ValK1_K0_f32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %9
+    %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %10:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %10
+    %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %11:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = nnan G_FMAXNUM_IEEE %5, %11
+    $vgpr0 = COPY %7(s32)
+...
+
+---
+name: test_max_min_K1Val_K0_f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_max_min_K1Val_K0_f64
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s64) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s64) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_CLAMP]](s64)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+    %13:vgpr(s64) = COPY %4(s64)
+    %5:vgpr(s64) = G_FMUL %0, %13
+    %6:sgpr(s64) = G_FCONSTANT double 1.000000e+00
+    %14:vgpr(s64) = COPY %6(s64)
+    %7:vgpr(s64) = nnan G_FMINNUM %14, %5
+    %8:sgpr(s64) = G_FCONSTANT double 0.000000e+00
+    %15:vgpr(s64) = COPY %8(s64)
+    %9:vgpr(s64) = nnan G_FMAXNUM %7, %15
+    $vgpr0_vgpr1 = COPY %9(s64)
+...
+
+---
+name: test_max_K0min_ValK1_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_K0min_ValK1_f16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    %2:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s16) = G_TRUNC %2(s32)
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %11:vgpr(s16) = COPY %3(s16)
+    %4:vgpr(s16) = G_FMUL %0, %11
+    %5:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %12:vgpr(s16) = COPY %5(s16)
+    %6:vgpr(s16) = nnan G_FMINNUM_IEEE %4, %12
+    %7:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %13:vgpr(s16) = COPY %7(s16)
+    %8:vgpr(s16) = nnan G_FMAXNUM_IEEE %13, %6
+    %10:vgpr(s32) = G_ANYEXT %8(s16)
+    $vgpr0 = COPY %10(s32)
+...
+
+---
+name: test_max_K0min_K1Val_v2f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_K0min_K1Val_v2f16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(<2 x s16>) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+    %13:sgpr(s32) = G_ANYEXT %3(s16)
+    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %13(s32), %13(s32)
+    %6:sgpr(s16) = G_FCONSTANT half 0xH3C00
+    %14:sgpr(s32) = G_ANYEXT %6(s16)
+    %15:sgpr(s32) = G_IMPLICIT_DEF
+    %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %14(s32), %15(s32)
+    %10:sgpr(s16) = G_FCONSTANT half 0xH0000
+    %16:sgpr(s32) = G_ANYEXT %10(s16)
+    %9:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %15(s32), %16(s32)
+    %17:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+    %4:vgpr(<2 x s16>) = G_FMUL %0, %17
+    %18:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+    %8:vgpr(<2 x s16>) = nnan G_FMINNUM %18, %4
+    %19:vgpr(<2 x s16>) = COPY %9(<2 x s16>)
+    %11:vgpr(<2 x s16>) = nnan G_FMAXNUM %19, %8
+    $vgpr0 = COPY %11(<2 x s16>)
+...
+
+# FixMe: add tests with attributes #3 = {"no-nans-fp-math"="true"}
+
+---
+name: test_min_max_K0_gt_K1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_max_K0_gt_K1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %7:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7
+    %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %8:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8
+    $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_min_K0_gt_K1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_min_K0_gt_K1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %7:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7
+    %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %8:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8
+    $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_min_max_maybe_NaN_input_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %9
+    %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %10:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_FMAXNUM %3, %10
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %11:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = G_FMINNUM %5, %11
+    $vgpr0 = COPY %7(s32)
+...
+
+---
+name: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: false
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %10:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %10
+    %4:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %9:vgpr(s32) = G_FCANONICALIZE %3
+    %11:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_FMAXNUM_IEEE %9, %11
+    %6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %12:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = G_FMINNUM_IEEE %5, %12
+    $vgpr0 = COPY %7(s32)
+...
+
+---
+name: test_max_min_maybe_NaN_input_ieee_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_true
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %10:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %10
+    %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %9:vgpr(s32) = G_FCANONICALIZE %3
+    %11:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_FMINNUM_IEEE %9, %11
+    %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %12:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = G_FMAXNUM_IEEE %5, %12
+    $vgpr0 = COPY %7(s32)
+...
+
+---
+name: test_max_min_maybe_NaN_input_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    dx10-clamp: true
+body: |
+  bb.1 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_false
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    %0:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+    %9:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_FMUL %0, %9
+    %4:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+    %10:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_FMINNUM %3, %10
+    %6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+    %11:vgpr(s32) = COPY %6(s32)
+    %7:vgpr(s32) = G_FMAXNUM %5, %11
+    $vgpr0 = COPY %7(s32)
+...