[llvm] [llvm][GISel] Use computeKnownFPClass (PR #141484)
Tim Gymnich via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 07:36:38 PDT 2025
https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/141484
>From 3b299a1561a92abaacad0907b62e63b85679ccf3 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:33:06 +0000
Subject: [PATCH 1/2] replace isKnownNeverNaN impl
---
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 7 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 +-
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 145 +++++++++++++++++-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 +-
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 78 +---------
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 +-
.../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 12 +-
7 files changed, 163 insertions(+), 89 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 66c960fe12c68..5f79bc3d8cd02 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -346,11 +346,12 @@ isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
LLVM_ABI bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
- bool SNaN = false);
+ GISelValueTracking *ValueTracking, bool SNaN = false);
/// Returns true if \p Val can be assumed to never be a signaling NaN.
-inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
- return isKnownNeverNaN(Val, MRI, true);
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI,
+ GISelValueTracking *ValueTracking) {
+ return isKnownNeverNaN(Val, MRI, ValueTracking, true);
}
LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..8952226ae7f1e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6519,8 +6519,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect(
CombinerHelper::SelectPatternNaNBehaviour
CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
bool IsOrderedComparison) const {
- bool LHSSafe = isKnownNeverNaN(LHS, MRI);
- bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+ bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT);
+ bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT);
// Completely unsafe.
if (!LHSSafe && !RHSSafe)
return SelectPatternNaNBehaviour::NOT_APPLICABLE;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 1286af864fb3f..c55f543d30d7a 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -937,8 +937,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverPosInfinity())
Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNever(fcSNan))
- Known.knownNot(fcSNan);
// Any negative value besides -0 returns a nan.
if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
@@ -961,6 +959,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FSINCOS: {
// Return NaN on infinite inputs.
Register Val = MI.getOperand(1).getReg();
@@ -968,18 +967,19 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);
+
Known.knownNot(fcInf);
if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
Known.knownNot(fcNan);
break;
}
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
- case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FMINIMUMNUM: {
Register LHS = MI.getOperand(1).getReg();
@@ -994,6 +994,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
Known = KnownLHS | KnownRHS;
+ if (Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
+ Opcode == TargetOpcode::G_FMINNUM_IEEE)
+ Known.knownNot(fcSNan);
+
// If either operand is not NaN, the result is not NaN.
if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
Opcode == TargetOpcode::G_FMAXNUM ||
@@ -1001,6 +1005,12 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Opcode == TargetOpcode::G_FMAXIMUMNUM))
Known.knownNot(fcNan);
+ if ((Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
+ Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
+ ((KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNever(fcSNan)) ||
+ (KnownLHS.isKnownNever(fcSNan) && KnownRHS.isKnownNeverNaN())))
+ Known.knownNot(fcNan);
+
if (Opcode == TargetOpcode::G_FMAXNUM ||
Opcode == TargetOpcode::G_FMAXIMUMNUM ||
Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
@@ -1089,6 +1099,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FCANONICALIZE: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;
+
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);
@@ -1190,6 +1201,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverNaN()) {
Known.knownNot(fcNan);
Known.signBitMustBeZero();
+ } else {
+ Known.knownNot(fcSNan);
}
break;
@@ -1300,6 +1313,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownFPClass KnownLHS, KnownRHS;
+
bool WantNegative =
(Opcode == TargetOpcode::G_FADD ||
Opcode == TargetOpcode::G_STRICT_FADD) &&
@@ -1364,6 +1378,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_STRICT_FMUL: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
+
// X * X is always non-negative or a NaN.
if (LHS == RHS)
Known.knownNot(fcNegative);
@@ -1494,6 +1509,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register Src = MI.getOperand(1).getReg();
// Infinity, nan and zero propagate from source.
computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
+ Known.knownNot(fcSNan);
LLT DstTy = MRI.getType(Dst).getScalarType();
const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
@@ -1517,6 +1533,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FPTRUNC: {
computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
Depth);
+ Known.knownNot(fcSNan);
break;
}
case TargetOpcode::G_SITOFP:
@@ -1698,6 +1715,126 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
break;
}
+ case TargetOpcode::G_FATAN: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcNan;
+
+ break;
+ }
+ case TargetOpcode::G_FATAN2: {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownFPClass KnownLHS;
+ KnownFPClass KnownRHS;
+
+ computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
+ Depth + 1);
+
+ computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
+ Depth + 1);
+
+ if (!KnownRHS.isKnownNeverNaN() || !KnownRHS.isKnownNeverNaN())
+ break;
+
+ if (KnownLHS.isKnownAlways(fcZero)) {
+ // atan2(+-0, −0) -> +-pi
+ // atan2(+-0, x) -> +-pi for x < 0
+ if (KnownRHS.isKnownAlways(fcNegFinite)) {
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ // atan2(+-0, +0) -> +-0
+ // atan2(+-0, x) -> +-0 for x > 0
+ if (KnownRHS.isKnownAlways(fcPosFinite)) {
+ Known.KnownFPClasses = fcZero;
+ break;
+ }
+ }
+
+ if (KnownRHS.isKnownAlways(fcZero)) {
+ // atan2(y, +-0) -> -pi/2 for y < 0
+ if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcNegFinite)) {
+ Known.KnownFPClasses = fcNegFinite;
+ break;
+ }
+
+ // atan2(y, +-0) -> +pi/2 for y > 0
+ if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcPosFinite)) {
+ Known.KnownFPClasses = fcPosFinite;
+ break;
+ }
+ }
+
+ if (KnownLHS.isKnownAlways(fcPosFinite) && KnownLHS.isKnownNeverZero()) {
+ // atan2(+-y, -inf) -> +-pi for finite y > 0
+ if (KnownRHS.isKnownAlways(fcNegInf)) {
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ // atan2(+-y, +inf) -> +-0 for finite y > 0
+ if (KnownRHS.isKnownAlways(fcPosInf)) {
+ Known.KnownFPClasses = fcZero;
+ break;
+ }
+ }
+
+ if (KnownLHS.isKnownAlways(fcInf)) {
+ // atan2(+-inf, x) -> +-pi/2 for finite x
+ // atan2(+-inf, -inf) -> +-3pi/4
+ // atan2(+-inf, +inf) -> +-pi/4
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_FCOSH: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // cosh(+-inf) -> +inf
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcPosInf;
+
+ break;
+ }
+ case TargetOpcode::G_FSINH: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // sinh(±∞) is ±∞
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcInf;
+
+ break;
+ }
+ case TargetOpcode::G_FTANH: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // tanh(+-inf) is +-1
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcFinite;
+
+ break;
+ }
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 028bffd1bf5a7..6ff35c330c234 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8179,10 +8179,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
// Note this must be done here, and not as an optimization combine in the
// absence of a dedicate quiet-snan instruction as we're using an
// omni-purpose G_FCANONICALIZE.
- if (!isKnownNeverSNaN(Src0, MRI))
+ if (!isKnownNeverSNaN(Src0, MRI, VT))
Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
- if (!isKnownNeverSNaN(Src1, MRI))
+ if (!isKnownNeverSNaN(Src1, MRI, VT))
Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 2584175121d63..95740e78fe8ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -807,7 +808,7 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
}
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
- bool SNaN) {
+ GISelValueTracking *VT, bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
if (!DefMI)
return false;
@@ -816,78 +817,11 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
- // If the value is a constant, we can obviously see if it is a NaN or not.
- if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
- return !FPVal->getValueAPF().isNaN() ||
- (SNaN && !FPVal->getValueAPF().isSignaling());
- }
-
- if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
- for (const auto &Op : DefMI->uses())
- if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
- return false;
- return true;
- }
+ KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
+ if (SNaN)
+ return FPClass.isKnownNever(fcSNan);
- switch (DefMI->getOpcode()) {
- default:
- break;
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FREM:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FTAN:
- case TargetOpcode::G_FACOS:
- case TargetOpcode::G_FASIN:
- case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
- case TargetOpcode::G_FCOSH:
- case TargetOpcode::G_FSINH:
- case TargetOpcode::G_FTANH:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FMAD:
- if (SNaN)
- return true;
-
- // TODO: Need isKnownNeverInfinity
- return false;
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE: {
- if (SNaN)
- return true;
- // This can return a NaN if either operand is an sNaN, or if both operands
- // are NaN.
- return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
- isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
- (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
- isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
- }
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXNUM: {
- // Only one needs to be known not-nan, since it will be returned if the
- // other ends up being one.
- return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
- isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
- }
- }
-
- if (SNaN) {
- // FP operations quiet. For now, just handle the ones inserted during
- // legalization.
- switch (DefMI->getOpcode()) {
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCANONICALIZE:
- return true;
- default:
- return false;
- }
- }
-
- return false;
+ return FPClass.isKnownNeverNaN();
}
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 18a948d68e97b..2a6073c20c73b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -859,7 +859,7 @@ class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
return CurDAG->isKnownNeverNaN(SDValue(N,0));
}];
let GISelPredicateCode = [{
- return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
+ return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT);
}];
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index ee324a5e93f0f..590731d508ce3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -261,7 +261,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
// nodes(max/min) have same behavior when one input is NaN and other isn't.
// Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
// also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
- if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+ if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) {
// Don't fold single use constant that can't be inlined.
if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
(!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
@@ -291,8 +291,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
// For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
// to 0.0 requires dx10_clamp = true.
if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
- isKnownNeverSNaN(Val, MRI)) ||
- isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+ isKnownNeverSNaN(Val, MRI, VT)) ||
+ isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) {
Reg = Val;
return true;
}
@@ -329,6 +329,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
Register Val = Src0->getOperand(0).getReg();
auto isOp3Zero = [&]() {
+ if (MI.getNumOperands() < 5)
+ return false;
MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
@@ -338,9 +340,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
// no NaN inputs. Most often MI is marked with nnan fast math flag.
// For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
// when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
- if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+ if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) ||
(getIEEE() && getDX10Clamp() &&
- (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+ (isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) {
Reg = Val;
return true;
}
>From afcba08a2c792ffa2bea81a1d6100e450622575f Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:35:57 +0000
Subject: [PATCH 2/2] update tests
---
.../GlobalISel/clamp-fmed3-const-combine.ll | 18 ++++++---
.../GlobalISel/clamp-minmax-const-combine.ll | 32 ++++++---------
.../GlobalISel/fmed3-min-max-const-combine.ll | 22 ++++------
.../regbankcombiner-clamp-fmed3-const.mir | 40 ++++++++++++++-----
.../regbankcombiner-clamp-minmax-const.mir | 35 ++++------------
.../regbankcombiner-fmed3-minmax-const.mir | 15 +++----
6 files changed, 78 insertions(+), 84 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index ef88a2be47872..fb33d3dbfc336 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index ab0de89d3e4e3..2955fb1f24f8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
+; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
+; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
@@ -305,9 +310,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -330,8 +333,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
@@ -355,9 +357,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -367,9 +367,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
@@ -381,9 +379,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -393,9 +389,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index cf0547e112a6b..df8573048d252 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -456,15 +456,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +487,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -509,7 +505,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -522,16 +518,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -543,7 +537,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
index a97d905f2a978..129cbcfca6fa5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
@@ -162,8 +162,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
index 70fd67363648d..c3eed50092056 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
@@ -441,13 +441,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
@@ -481,13 +476,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -522,14 +513,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -564,13 +550,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
index 2f41d86100040..a53e97af0d028 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
@@ -469,11 +469,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -502,11 +501,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -536,11 +534,10 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = G_FCANONICALIZE %0
More information about the llvm-commits
mailing list