[llvm] [GISel] Use GISelValueTracking in isKnownNeverNaN (PR #190542)

Tim Gymnich via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 5 11:05:58 PDT 2026


https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/190542

>From 1e64a21fb259481f1824526a32ead6a4cf763a0b Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Sat, 4 Apr 2026 19:14:56 -0500
Subject: [PATCH] [GISel] isKnownNeverNaN: thread GISelValueTracking for
 sharper analysis

Pass GISelValueTracking* through isKnownNeverNaN and isKnownNeverSNaN so
that the implementation can call computeKnownFPClass to derive NaN
information from value tracking, rather than only looking at flags and
direct constant definitions. Update all callers.

Co-Authored-By: Claude Sonnet 4.6 <noreply at anthropic.com>
---
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h  |  6 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  4 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  6 +-
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         | 77 ++-----------------
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  2 +-
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   | 10 +--
 llvm/test/CodeGen/AArch64/known-never-nan.ll  | 36 ++++++---
 .../GlobalISel/clamp-minmax-const-combine.ll  | 31 ++------
 .../GlobalISel/fmed3-min-max-const-combine.ll | 26 +++----
 .../regbankcombiner-clamp-minmax-const.mir    | 35 ++-------
 .../regbankcombiner-fmed3-minmax-const.mir    | 15 ++--
 11 files changed, 75 insertions(+), 173 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index de6606e7ed14a..0ffec025b7e36 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -340,12 +340,12 @@ isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
 
 /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
 /// this returns if \p Val can be assumed to never be a signaling NaN.
-LLVM_ABI bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+LLVM_ABI bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT,
                               bool SNaN = false);
 
 /// Returns true if \p Val can be assumed to never be a signaling NaN.
-inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
-  return isKnownNeverNaN(Val, MRI, true);
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT) {
+  return isKnownNeverNaN(Val, MRI, VT, true);
 }
 
 LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 177170575fe07..cfeefdb906f71 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7093,8 +7093,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect(
 CombinerHelper::SelectPatternNaNBehaviour
 CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
                                         bool IsOrderedComparison) const {
-  bool LHSSafe = isKnownNeverNaN(LHS, MRI);
-  bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+  bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT);
+  bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT);
   // Completely unsafe.
   if (!LHSSafe && !RHSSafe)
     return SelectPatternNaNBehaviour::NOT_APPLICABLE;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ac3d1f62ce9cc..b865bcad92444 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8968,10 +8968,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
     // Note this must be done here, and not as an optimization combine in the
     // absence of a dedicate quiet-snan instruction as we're using an
     // omni-purpose G_FCANONICALIZE.
-    if (!isKnownNeverSNaN(Src0, MRI))
+    if (!isKnownNeverSNaN(Src0, MRI, VT))
       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
 
-    if (!isKnownNeverSNaN(Src1, MRI))
+    if (!isKnownNeverSNaN(Src1, MRI, VT))
       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
   }
 
@@ -9011,7 +9011,7 @@ LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
 
   // Propagate any NaN of both operands
   if (!MI.getFlag(MachineInstr::FmNoNans) &&
-      (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
+      (!isKnownNeverNaN(Src0, MRI, VT) || isKnownNeverNaN(Src1, MRI, VT))) {
     auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
 
     LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index d019633369163..96e2c9bbbf002 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -833,7 +833,7 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
   return FoldedElements;
 }
 
-bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT,
                            bool SNaN) {
   const MachineInstr *DefMI = MRI.getVRegDef(Val);
   if (!DefMI)
@@ -842,78 +842,11 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   if (DefMI->getFlag(MachineInstr::FmNoNans))
     return true;
 
-  // If the value is a constant, we can obviously see if it is a NaN or not.
-  if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
-    return !FPVal->getValueAPF().isNaN() ||
-           (SNaN && !FPVal->getValueAPF().isSignaling());
-  }
-
-  if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
-    for (const auto &Op : DefMI->uses())
-      if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
-        return false;
-    return true;
-  }
+  KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
+  if (SNaN)
+    return FPClass.isKnownNever(fcSNan);
 
-  switch (DefMI->getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_FREM:
-  case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FTAN:
-  case TargetOpcode::G_FACOS:
-  case TargetOpcode::G_FASIN:
-  case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
-  case TargetOpcode::G_FCOSH:
-  case TargetOpcode::G_FSINH:
-  case TargetOpcode::G_FTANH:
-  case TargetOpcode::G_FMA:
-  case TargetOpcode::G_FMAD:
-    if (SNaN)
-      return true;
-
-    // TODO: Need isKnownNeverInfinity
-    return false;
-  case TargetOpcode::G_FMINNUM_IEEE:
-  case TargetOpcode::G_FMAXNUM_IEEE: {
-    if (SNaN)
-      return true;
-    // This can return a NaN if either operand is an sNaN, or if both operands
-    // are NaN.
-    return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
-            isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
-           (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
-            isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
-  }
-  case TargetOpcode::G_FMINNUM:
-  case TargetOpcode::G_FMAXNUM: {
-    // Only one needs to be known not-nan, since it will be returned if the
-    // other ends up being one.
-    return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
-           isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
-  }
-  }
-
-  if (SNaN) {
-    // FP operations quiet. For now, just handle the ones inserted during
-    // legalization.
-    switch (DefMI->getOpcode()) {
-    case TargetOpcode::G_FPEXT:
-    case TargetOpcode::G_FPTRUNC:
-    case TargetOpcode::G_FCANONICALIZE:
-      return true;
-    default:
-      return false;
-    }
-  }
-
-  return false;
+  return FPClass.isKnownNeverNaN();
 }
 
 Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 9b6443a99ac55..3bea58ce95f0c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -844,7 +844,7 @@ class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
     return CurDAG->isKnownNeverNaN(SDValue(N,0));
   }];
   let GISelPredicateCode = [{
-    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
+    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT);
   }];
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 0e1d4075ed92e..413d6cb4c1e4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -265,7 +265,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
   // nodes(max/min) have same behavior when one input is NaN and other isn't.
   // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
   // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
-  if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+  if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) {
     // Don't fold single use constant that can't be inlined.
     if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
         (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
@@ -295,8 +295,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
   // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
   // to 0.0 requires dx10_clamp = true.
   if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
-       isKnownNeverSNaN(Val, MRI)) ||
-      isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+       isKnownNeverSNaN(Val, MRI, VT)) ||
+      isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) {
     Reg = Val;
     return true;
   }
@@ -342,9 +342,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
   // no NaN inputs. Most often MI is marked with nnan fast math flag.
   // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
   // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
-  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) ||
       (getIEEE() && getDX10Clamp() &&
-       (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+       (isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) {
     Reg = Val;
     return true;
   }
diff --git a/llvm/test/CodeGen/AArch64/known-never-nan.ll b/llvm/test/CodeGen/AArch64/known-never-nan.ll
index bd080e29890e2..569512fe475f8 100644
--- a/llvm/test/CodeGen/AArch64/known-never-nan.ll
+++ b/llvm/test/CodeGen/AArch64/known-never-nan.ll
@@ -27,18 +27,30 @@ define float @fmaxnm(i32 %i1, i32 %i2) #0 {
 ; If f1 is 0, fmul is NaN because 0.0 * -INF = NaN
 ; Therefore, this is not fmaxnm.
 define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 {
-; CHECK-LABEL: not_fmaxnm_maybe_nan:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf s0, w0
-; CHECK-NEXT:    ucvtf s1, w1
-; CHECK-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-NEXT:    fmov s2, #17.00000000
-; CHECK-NEXT:    fmov s3, w8
-; CHECK-NEXT:    fmul s0, s0, s3
-; CHECK-NEXT:    fadd s1, s1, s2
-; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    fcsel s0, s0, s1, pl
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: not_fmaxnm_maybe_nan:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf s0, w0
+; CHECK-SD-NEXT:    ucvtf s1, w1
+; CHECK-SD-NEXT:    mov w8, #-8388608 // =0xff800000
+; CHECK-SD-NEXT:    fmov s2, #17.00000000
+; CHECK-SD-NEXT:    fmov s3, w8
+; CHECK-SD-NEXT:    fmul s0, s0, s3
+; CHECK-SD-NEXT:    fadd s1, s1, s2
+; CHECK-SD-NEXT:    fcmp s0, s1
+; CHECK-SD-NEXT:    fcsel s0, s0, s1, pl
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: not_fmaxnm_maybe_nan:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf s0, w0
+; CHECK-GI-NEXT:    ucvtf s1, w1
+; CHECK-GI-NEXT:    mov w8, #-8388608 // =0xff800000
+; CHECK-GI-NEXT:    fmov s2, #17.00000000
+; CHECK-GI-NEXT:    fmov s3, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s3
+; CHECK-GI-NEXT:    fadd s1, s1, s2
+; CHECK-GI-NEXT:    fmax s0, s0, s1
+; CHECK-GI-NEXT:    ret
   %f1 = uitofp i32 %i1 to float
   %fmul = fmul float %f1, 0xfff0000000000000 ; -INFINITY as 64-bit hex
   %f2 = uitofp i32 %i2 to float
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 8fd41241b7ae4..9e708c4b0a7a3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -384,9 +384,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -415,8 +413,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
@@ -446,17 +443,13 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 ; GFX1170:       ; %bb.0:
 ; GFX1170-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1170-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX1170-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX1170-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -466,9 +459,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
@@ -480,17 +471,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX1170:       ; %bb.0:
 ; GFX1170-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1170-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX1170-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX1170-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -500,9 +487,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %fmul = fmul float %a, 2.0
   %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index dbcd4eaa2d71d..2677e2e8005d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -553,15 +553,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -594,15 +592,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -610,7 +606,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX1170-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1170-NEXT:    v_max_num_f32_e32 v0, v0, v0
 ; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX1170-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX1170-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -622,7 +618,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -635,16 +631,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -652,7 +646,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 ; GFX1170-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1170-NEXT:    v_max_num_f32_e32 v0, v0, v0
 ; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX1170-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX1170-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -664,7 +658,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
index 70fd67363648d..c3eed50092056 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
@@ -441,13 +441,8 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %9:vgpr(s32) = COPY %2(s32)
@@ -481,13 +476,9 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
     ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %10:vgpr(s32) = COPY %2(s32)
@@ -522,14 +513,9 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
     ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %10:vgpr(s32) = COPY %2(s32)
@@ -564,13 +550,8 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
     ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %9:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
index 2f41d86100040..a53e97af0d028 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
@@ -469,11 +469,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     %7:vgpr(s32) = COPY %2(s32)
@@ -502,11 +501,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     %7:vgpr(s32) = COPY %2(s32)
@@ -536,11 +534,10 @@ body: |
     ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
     %0:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
     %7:vgpr(s32) = G_FCANONICALIZE %0



More information about the llvm-commits mailing list