[llvm] [llvm][GISel] Use computeKnownFPClass (PR #141484)
Tim Gymnich via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 06:46:04 PDT 2025
https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/141484
>From 075d8cbe575465f1bd2ee142b478213051350b1f Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 14:47:18 +0000
Subject: [PATCH 1/7] guard against non-virtual registers
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 42 +++++++++++--------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 67b1a449f8483..f1e77d813f0df 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -693,6 +693,9 @@ static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
FPClassTest InterestedClasses,
unsigned Depth) {
+ if (!R.isVirtual())
+ return;
+
LLT Ty = MRI.getType(R);
APInt DemandedElts =
Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
@@ -736,6 +739,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ if (!R.isVirtual())
+ return;
+
MachineInstr &MI = *MRI.getVRegDef(R);
unsigned Opcode = MI.getOpcode();
LLT DstTy = MRI.getType(R);
@@ -1024,7 +1030,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
//
if ((Known.KnownFPClasses & fcZero) != fcNone &&
!Known.isKnownNeverSubnormal()) {
- DenormalMode Mode = MF->getDenormalMode(getFltSemanticForLLT(DstTy));
+ DenormalMode Mode = MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
if (Mode != DenormalMode::getIEEE())
Known.KnownFPClasses |= fcZero;
}
@@ -1086,8 +1092,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
// If the parent function flushes denormals, the canonical output cannot
// be a denormal.
- LLT Ty = MRI.getType(Val);
- const fltSemantics &FPType = getFltSemanticForLLT(Ty.getScalarType());
+ LLT Ty = MRI.getType(Val).getScalarType();
+ const fltSemantics &FPType = getFltSemanticForLLT(Ty);
DenormalMode DenormMode = MF->getDenormalMode(FPType);
if (DenormMode == DenormalMode::getIEEE()) {
if (KnownSrc.isKnownNever(fcPosZero))
@@ -1197,8 +1203,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
Known.knownNot(fcNan);
- LLT Ty = MRI.getType(Val);
- const fltSemantics &FltSem = getFltSemanticForLLT(Ty.getScalarType());
+ LLT Ty = MRI.getType(Val).getScalarType();
+ const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
DenormalMode Mode = MF->getDenormalMode(FltSem);
if (KnownSrc.isKnownNeverLogicalZero(Mode))
@@ -1317,18 +1323,18 @@ void GISelValueTracking::computeKnownFPClass(Register R,
// (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
if ((KnownLHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy))) ||
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
KnownRHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
// Make sure output negative denormal can't flush to -0
outputDenormalIsIEEEOrPosZero(*MF, DstTy))
Known.knownNot(fcNegZero);
} else {
// Only fsub -0, +0 can return -0
if ((KnownLHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy))) ||
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
KnownRHS.isKnownNeverLogicalPosZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
// Make sure output negative denormal can't flush to -0
outputDenormalIsIEEEOrPosZero(*MF, DstTy))
Known.knownNot(fcNegZero);
@@ -1375,10 +1381,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if ((KnownRHS.isKnownNeverInfinity() ||
KnownLHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) &&
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
(KnownLHS.isKnownNeverInfinity() ||
KnownRHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))))
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
Known.knownNot(fcNan);
break;
@@ -1431,9 +1437,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
(KnownLHS.isKnownNeverInfinity() ||
KnownRHS.isKnownNeverInfinity()) &&
((KnownLHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) ||
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) ||
(KnownRHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))))) {
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))) {
Known.knownNot(fcNan);
}
@@ -1447,7 +1453,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
KnownLHS.isKnownNeverInfinity() &&
KnownRHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy)))) {
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) {
Known.knownNot(fcNan);
}
@@ -1472,10 +1478,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
// Infinity, nan and zero propagate from source.
computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
- LLT DstTy = MRI.getType(Dst);
- const fltSemantics &DstSem = getFltSemanticForLLT(DstTy.getScalarType());
- LLT SrcTy = MRI.getType(Src);
- const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy.getScalarType());
+ LLT DstTy = MRI.getType(Dst).getScalarType();
+ const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
+ LLT SrcTy = MRI.getType(Src).getScalarType();
+ const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
// All subnormal inputs should be in the normal range in the result type.
if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {
>From d96397c3e074501ee1c8028332d67077f3f8a8bc Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:33:06 +0000
Subject: [PATCH 2/7] replace isKnownNeverNaN impl
---
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 6 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 +-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 +-
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 82 ++-----------------
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 +-
.../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 12 +--
6 files changed, 23 insertions(+), 87 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 684a9bf554fb1..503f61216d9e6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -336,12 +336,12 @@ bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
-bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking,
bool SNaN = false);
/// Returns true if \p Val can be assumed to never be a signaling NaN.
-inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
- return isKnownNeverNaN(Val, MRI, true);
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking) {
+ return isKnownNeverNaN(Val, MRI, ValueTracking, true);
}
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..8952226ae7f1e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6519,8 +6519,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect(
CombinerHelper::SelectPatternNaNBehaviour
CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
bool IsOrderedComparison) const {
- bool LHSSafe = isKnownNeverNaN(LHS, MRI);
- bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+ bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT);
+ bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT);
// Completely unsafe.
if (!LHSSafe && !RHSSafe)
return SelectPatternNaNBehaviour::NOT_APPLICABLE;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7b18a98d7f3ca..e242df04a5d80 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8179,10 +8179,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
// Note this must be done here, and not as an optimization combine in the
// absence of a dedicate quiet-snan instruction as we're using an
// omni-purpose G_FCANONICALIZE.
- if (!isKnownNeverSNaN(Src0, MRI))
+ if (!isKnownNeverSNaN(Src0, MRI, VT))
Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
- if (!isKnownNeverSNaN(Src1, MRI))
+ if (!isKnownNeverSNaN(Src1, MRI, VT))
Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 64af7a57e8d12..98594edb70f71 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -806,7 +807,7 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
return FoldedElements;
}
-bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT,
bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
if (!DefMI)
@@ -815,79 +816,12 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
const TargetMachine& TM = DefMI->getMF()->getTarget();
if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
-
- // If the value is a constant, we can obviously see if it is a NaN or not.
- if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) {
- return !FPVal->getValueAPF().isNaN() ||
- (SNaN && !FPVal->getValueAPF().isSignaling());
- }
-
- if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
- for (const auto &Op : DefMI->uses())
- if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN))
- return false;
- return true;
- }
-
- switch (DefMI->getOpcode()) {
- default:
- break;
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FREM:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FTAN:
- case TargetOpcode::G_FACOS:
- case TargetOpcode::G_FASIN:
- case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
- case TargetOpcode::G_FCOSH:
- case TargetOpcode::G_FSINH:
- case TargetOpcode::G_FTANH:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FMAD:
- if (SNaN)
- return true;
-
- // TODO: Need isKnownNeverInfinity
- return false;
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE: {
- if (SNaN)
- return true;
- // This can return a NaN if either operand is an sNaN, or if both operands
- // are NaN.
- return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) &&
- isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) ||
- (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) &&
- isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI));
- }
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXNUM: {
- // Only one needs to be known not-nan, since it will be returned if the
- // other ends up being one.
- return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) ||
- isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN);
- }
- }
-
- if (SNaN) {
- // FP operations quiet. For now, just handle the ones inserted during
- // legalization.
- switch (DefMI->getOpcode()) {
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCANONICALIZE:
- return true;
- default:
- return false;
- }
- }
-
- return false;
+
+ KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
+ if (SNaN)
+ return FPClass.isKnownNever(fcSNan);
+
+ return FPClass.isKnownNeverNaN();
}
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 18a948d68e97b..2a6073c20c73b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -859,7 +859,7 @@ class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
return CurDAG->isKnownNeverNaN(SDValue(N,0));
}];
let GISelPredicateCode = [{
- return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
+ return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT);
}];
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index ee324a5e93f0f..590731d508ce3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -261,7 +261,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
// nodes(max/min) have same behavior when one input is NaN and other isn't.
// Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
// also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
- if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+ if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) {
// Don't fold single use constant that can't be inlined.
if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
(!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
@@ -291,8 +291,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
// For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
// to 0.0 requires dx10_clamp = true.
if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
- isKnownNeverSNaN(Val, MRI)) ||
- isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+ isKnownNeverSNaN(Val, MRI, VT)) ||
+ isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) {
Reg = Val;
return true;
}
@@ -329,6 +329,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
Register Val = Src0->getOperand(0).getReg();
auto isOp3Zero = [&]() {
+ if (MI.getNumOperands() < 5)
+ return false;
MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
@@ -338,9 +340,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
// no NaN inputs. Most often MI is marked with nnan fast math flag.
// For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
// when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
- if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+ if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) ||
(getIEEE() && getDX10Clamp() &&
- (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+ (isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) {
Reg = Val;
return true;
}
>From d30141b420967ba09fb865b66a78dfd9bf320684 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Fri, 23 May 2025 16:35:57 +0000
Subject: [PATCH 3/7] update tests
---
.../GlobalISel/clamp-minmax-const-combine.ll | 23 ++++--------
.../GlobalISel/fmed3-min-max-const-combine.ll | 22 +++++-------
.../regbankcombiner-clamp-minmax-const.mir | 35 +++++--------------
.../regbankcombiner-fmed3-minmax-const.mir | 15 ++++----
4 files changed, 28 insertions(+), 67 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index ab0de89d3e4e3..3c57fe3eb5176 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -305,9 +305,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -330,8 +328,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
@@ -355,9 +352,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -367,9 +362,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
@@ -381,9 +374,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
+; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -393,9 +384,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index cf0547e112a6b..df8573048d252 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -456,15 +456,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
@@ -489,15 +487,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
@@ -509,7 +505,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
@@ -522,16 +518,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
@@ -543,7 +537,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
index 70fd67363648d..c3eed50092056 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir
@@ -441,13 +441,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
@@ -481,13 +476,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -522,14 +513,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%10:vgpr(s32) = COPY %2(s32)
@@ -564,13 +550,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%9:vgpr(s32) = COPY %2(s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
index 2f41d86100040..a53e97af0d028 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
@@ -469,11 +469,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -502,11 +501,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = COPY %2(s32)
@@ -536,11 +534,10 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
- ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
- ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
%7:vgpr(s32) = G_FCANONICALIZE %0
>From c4e9c3cb0d68db884f0406b656a94f1017c03a23 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Sun, 25 May 2025 20:16:25 +0000
Subject: [PATCH 4/7] handle snan
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 55 +++++++++++++++++--
1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index f1e77d813f0df..7d3af5c05e5c2 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -886,6 +886,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FMA:
case TargetOpcode::G_STRICT_FMA:
case TargetOpcode::G_FMAD: {
+ Known.knownNot(fcSNan);
+
if ((InterestedClasses & fcNegative) == fcNone)
break;
@@ -951,19 +953,21 @@ void GISelValueTracking::computeKnownFPClass(Register R,
KnownFPClass KnownSrc;
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
- Depth + 1);
+ Depth + 1);
+
+ Known.knownNot(fcSNan);
Known.knownNot(fcInf);
if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
Known.knownNot(fcNan);
break;
}
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
- case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FMINIMUMNUM: {
Register LHS = MI.getOperand(1).getReg();
@@ -976,8 +980,13 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Depth + 1);
bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
+ bool NeverSNaN = KnownLHS.isKnownNever(fcSNan) || KnownRHS.isKnownNever(fcSNan);
Known = KnownLHS | KnownRHS;
+ if (Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
+ Opcode == TargetOpcode::G_FMINNUM_IEEE)
+ Known.knownNot(fcSNan);
+
// If either operand is not NaN, the result is not NaN.
if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
Opcode == TargetOpcode::G_FMAXNUM ||
@@ -985,6 +994,18 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Opcode == TargetOpcode::G_FMAXIMUMNUM))
Known.knownNot(fcNan);
+ if (NeverSNaN && (Opcode == TargetOpcode::G_FMINNUM ||
+ Opcode == TargetOpcode::G_FMAXNUM ||
+ Opcode == TargetOpcode::G_FMINIMUMNUM ||
+ Opcode == TargetOpcode::G_FMAXIMUMNUM))
+ Known.knownNot(fcSNan);
+
+ if ((Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
+ Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
+ ((KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNever(fcSNan)) ||
+ (KnownLHS.isKnownNever(fcSNan) && KnownRHS.isKnownNeverNaN())))
+ Known.knownNot(fcNan);
+
if (Opcode == TargetOpcode::G_FMAXNUM ||
Opcode == TargetOpcode::G_FMAXIMUMNUM ||
Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
@@ -1072,9 +1093,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FCANONICALIZE: {
Register Val = MI.getOperand(1).getReg();
KnownFPClass KnownSrc;
- computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
- Depth + 1);
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
// This is essentially a stronger form of
// propagateCanonicalizingSrc. Other "canonicalizing" operations don't
// actually have an IR canonicalization guarantee.
@@ -1283,6 +1305,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownFPClass KnownLHS, KnownRHS;
+
+ Known.knownNot(fcSNan);
+
bool WantNegative =
(Opcode == TargetOpcode::G_FADD ||
Opcode == TargetOpcode::G_STRICT_FADD) &&
@@ -1347,6 +1372,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_STRICT_FMUL: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
+
+ Known.knownNot(fcSNan);
+
// X * X is always non-negative or a NaN.
if (LHS == RHS)
Known.knownNot(fcNegative);
@@ -1393,6 +1421,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FREM: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
+ Known.knownNot(fcSNan);
if (LHS == RHS) {
// TODO: Could filter out snan if we inspect the operand
@@ -1477,6 +1506,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register Src = MI.getOperand(1).getReg();
// Infinity, nan and zero propagate from source.
computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
+ Known.knownNot(fcSNan);
LLT DstTy = MRI.getType(Dst).getScalarType();
const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
@@ -1499,7 +1529,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
case TargetOpcode::G_FPTRUNC: {
computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
- Depth);
+ Depth);
+ Known.knownNot(fcSNan);
break;
}
case TargetOpcode::G_SITOFP:
@@ -1677,6 +1708,18 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
break;
}
+ case TargetOpcode::G_FTAN:
+ case TargetOpcode::G_FACOS:
+ case TargetOpcode::G_FASIN:
+ case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
+ case TargetOpcode::G_FCOSH:
+ case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FTANH: {
+ Known.knownNot(fcSNan);
+ // TODO: ...
+ break;
+ }
}
}
>From 5d0919a23433e2cd1af1679af6864a12526263fb Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Mon, 26 May 2025 12:14:42 +0000
Subject: [PATCH 5/7] clang format
---
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 7 +--
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 54 ++++++++++---------
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 8 +--
3 files changed, 36 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 503f61216d9e6..cd27a3d5cdbac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -336,11 +336,12 @@ bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI,
/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
/// this returns if \p Val can be assumed to never be a signaling NaN.
-bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking,
- bool SNaN = false);
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ GISelValueTracking *ValueTracking, bool SNaN = false);
/// Returns true if \p Val can be assumed to never be a signaling NaN.
-inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *ValueTracking) {
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI,
+ GISelValueTracking *ValueTracking) {
return isKnownNeverNaN(Val, MRI, ValueTracking, true);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 7d3af5c05e5c2..efaca3d5dd9f7 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -695,7 +695,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
unsigned Depth) {
if (!R.isVirtual())
return;
-
+
LLT Ty = MRI.getType(R);
APInt DemandedElts =
Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
@@ -953,8 +953,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
KnownFPClass KnownSrc;
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
- Depth + 1);
-
+ Depth + 1);
+
Known.knownNot(fcSNan);
Known.knownNot(fcInf);
@@ -980,7 +980,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Depth + 1);
bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
- bool NeverSNaN = KnownLHS.isKnownNever(fcSNan) || KnownRHS.isKnownNever(fcSNan);
+ bool NeverSNaN =
+ KnownLHS.isKnownNever(fcSNan) || KnownRHS.isKnownNever(fcSNan);
Known = KnownLHS | KnownRHS;
if (Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
@@ -1051,7 +1052,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
//
if ((Known.KnownFPClasses & fcZero) != fcNone &&
!Known.isKnownNeverSubnormal()) {
- DenormalMode Mode = MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
+ DenormalMode Mode =
+ MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
if (Mode != DenormalMode::getIEEE())
Known.KnownFPClasses |= fcZero;
}
@@ -1095,8 +1097,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
KnownFPClass KnownSrc;
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
- Depth + 1);
-
+ Depth + 1);
+
// This is essentially a stronger form of
// propagateCanonicalizingSrc. Other "canonicalizing" operations don't
// actually have an IR canonicalization guarantee.
@@ -1347,19 +1349,19 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
// (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
- if ((KnownLHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
- KnownRHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
+ if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType()))) ||
+ KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))) &&
// Make sure output negative denormal can't flush to -0
outputDenormalIsIEEEOrPosZero(*MF, DstTy))
Known.knownNot(fcNegZero);
} else {
// Only fsub -0, +0 can return -0
- if ((KnownLHS.isKnownNeverLogicalNegZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))) ||
- KnownRHS.isKnownNeverLogicalPosZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
+ if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType()))) ||
+ KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))) &&
// Make sure output negative denormal can't flush to -0
outputDenormalIsIEEEOrPosZero(*MF, DstTy))
Known.knownNot(fcNegZero);
@@ -1372,9 +1374,9 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_STRICT_FMUL: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
-
+
Known.knownNot(fcSNan);
-
+
// X * X is always non-negative or a NaN.
if (LHS == RHS)
Known.knownNot(fcNegative);
@@ -1408,8 +1410,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
if ((KnownRHS.isKnownNeverInfinity() ||
- KnownLHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) &&
+ KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))) &&
(KnownLHS.isKnownNeverInfinity() ||
KnownRHS.isKnownNeverLogicalZero(
MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
@@ -1465,10 +1467,10 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
(KnownLHS.isKnownNeverInfinity() ||
KnownRHS.isKnownNeverInfinity()) &&
- ((KnownLHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) ||
- (KnownRHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))) {
+ ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))) ||
+ (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))))) {
Known.knownNot(fcNan);
}
@@ -1481,8 +1483,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
// Inf REM x and x REM 0 produce NaN.
if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
KnownLHS.isKnownNeverInfinity() &&
- KnownRHS.isKnownNeverLogicalZero(
- MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))) {
+ KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
+ getFltSemanticForLLT(DstTy.getScalarType())))) {
Known.knownNot(fcNan);
}
@@ -1529,7 +1531,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
case TargetOpcode::G_FPTRUNC: {
computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
- Depth);
+ Depth);
Known.knownNot(fcSNan);
break;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 98594edb70f71..cf8ded67ac679 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -807,8 +807,8 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
return FoldedElements;
}
-bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelValueTracking *VT,
- bool SNaN) {
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ GISelValueTracking *VT, bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
if (!DefMI)
return false;
@@ -816,11 +816,11 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, GISelVa
const TargetMachine& TM = DefMI->getMF()->getTarget();
if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
-
+
KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan);
if (SNaN)
return FPClass.isKnownNever(fcSNan);
-
+
return FPClass.isKnownNeverNaN();
}
>From 25f6d5b31feb9c87dfc2d4f37dffbdcbdd407bf8 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Wed, 28 May 2025 14:50:56 +0200
Subject: [PATCH 6/7] fix wrong SNaN handling
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 147 ++++++++++++++----
1 file changed, 120 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index efaca3d5dd9f7..e98c49bbcc9dd 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -886,8 +886,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FMA:
case TargetOpcode::G_STRICT_FMA:
case TargetOpcode::G_FMAD: {
- Known.knownNot(fcSNan);
-
if ((InterestedClasses & fcNegative) == fcNone)
break;
@@ -923,8 +921,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverPosInfinity())
Known.knownNot(fcPosInf);
- if (KnownSrc.isKnownNever(fcSNan))
- Known.knownNot(fcSNan);
// Any negative value besides -0 returns a nan.
if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
@@ -947,6 +943,7 @@ void GISelValueTracking::computeKnownFPClass(Register R,
}
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FSINCOS: {
// Return NaN on infinite inputs.
Register Val = MI.getOperand(1).getReg();
@@ -955,7 +952,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
Depth + 1);
- Known.knownNot(fcSNan);
Known.knownNot(fcInf);
if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
@@ -980,8 +976,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Depth + 1);
bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
- bool NeverSNaN =
- KnownLHS.isKnownNever(fcSNan) || KnownRHS.isKnownNever(fcSNan);
Known = KnownLHS | KnownRHS;
if (Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
@@ -995,12 +989,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Opcode == TargetOpcode::G_FMAXIMUMNUM))
Known.knownNot(fcNan);
- if (NeverSNaN && (Opcode == TargetOpcode::G_FMINNUM ||
- Opcode == TargetOpcode::G_FMAXNUM ||
- Opcode == TargetOpcode::G_FMINIMUMNUM ||
- Opcode == TargetOpcode::G_FMAXIMUMNUM))
- Known.knownNot(fcSNan);
-
if ((Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
((KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNever(fcSNan)) ||
@@ -1197,6 +1185,8 @@ void GISelValueTracking::computeKnownFPClass(Register R,
if (KnownSrc.isKnownNeverNaN()) {
Known.knownNot(fcNan);
Known.signBitMustBeZero();
+ } else {
+ Known.knownNot(fcSNan);
}
break;
@@ -1308,8 +1298,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register RHS = MI.getOperand(2).getReg();
KnownFPClass KnownLHS, KnownRHS;
- Known.knownNot(fcSNan);
-
bool WantNegative =
(Opcode == TargetOpcode::G_FADD ||
Opcode == TargetOpcode::G_STRICT_FADD) &&
@@ -1375,8 +1363,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- Known.knownNot(fcSNan);
-
// X * X is always non-negative or a NaN.
if (LHS == RHS)
Known.knownNot(fcNegative);
@@ -1423,7 +1409,6 @@ void GISelValueTracking::computeKnownFPClass(Register R,
case TargetOpcode::G_FREM: {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- Known.knownNot(fcSNan);
if (LHS == RHS) {
// TODO: Could filter out snan if we inspect the operand
@@ -1710,16 +1695,124 @@ void GISelValueTracking::computeKnownFPClass(Register R,
computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
break;
}
- case TargetOpcode::G_FTAN:
- case TargetOpcode::G_FACOS:
- case TargetOpcode::G_FASIN:
- case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
- case TargetOpcode::G_FCOSH:
- case TargetOpcode::G_FSINH:
+ case TargetOpcode::G_FATAN: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcNan;
+
+ break;
+ }
+ case TargetOpcode::G_FATAN2: {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownFPClass KnownLHS;
+ KnownFPClass KnownRHS;
+
+ computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
+ Depth + 1);
+
+ computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
+ Depth + 1);
+
+ if (!KnownRHS.isKnownNeverNaN() || !KnownRHS.isKnownNeverNaN())
+ break;
+
+ if (KnownLHS.isKnownAlways(fcZero)) {
+ // atan2(+-0, −0) -> +-pi
+ // atan2(+-0, x) -> +-pi for x < 0
+ if (KnownRHS.isKnownAlways(fcNegFinite)) {
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ // atan2(+-0, +0) -> +-0
+ // atan2(+-0, x) -> +-0 for x > 0
+ if (KnownRHS.isKnownAlways(fcPosFinite)) {
+ Known.KnownFPClasses = fcZero;
+ break;
+ }
+ }
+
+ if (KnownRHS.isKnownAlways(fcZero)) {
+ // atan2(y, +-0) -> -pi/2 for y < 0
+ if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcNegFinite)) {
+ Known.KnownFPClasses = fcNegFinite;
+ break;
+ }
+
+ // atan2(y, +-0) -> +pi/2 for y > 0
+ if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcPosFinite)) {
+ Known.KnownFPClasses = fcPosFinite;
+ break;
+ }
+ }
+
+ if (KnownLHS.isKnownAlways(fcPosFinite) && KnownLHS.isKnownNeverZero()) {
+ // atan2(+-y, -inf) -> +-pi for finite y > 0
+ if (KnownRHS.isKnownAlways(fcNegInf)) {
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ // atan2(+-y, +inf) -> +-0 for finite y > 0
+ if (KnownRHS.isKnownAlways(fcPosInf)) {
+ Known.KnownFPClasses = fcZero;
+ break;
+ }
+ }
+
+ if (KnownLHS.isKnownAlways(fcInf)) {
+ // atan2(+-inf, x) -> +-pi/2 for finite x
+ // atan2(+-inf, -inf) -> +-3pi/4
+ // atan2(+-inf, +inf) -> +-pi/4
+ Known.KnownFPClasses = fcFinite;
+ break;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_FCOSH: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // cosh(+-inf) -> +inf
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcPosInf;
+
+ break;
+ }
+ case TargetOpcode::G_FSINH: {
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // sinh(±∞) is ±∞
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcInf;
+
+ break;
+ }
case TargetOpcode::G_FTANH: {
- Known.knownNot(fcSNan);
- // TODO: ...
+ Register Val = MI.getOperand(1).getReg();
+ KnownFPClass KnownSrc;
+
+ computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
+ Depth + 1);
+
+ // tanh(+-inf) is +-1
+ if (KnownSrc.isKnownAlways(fcInf))
+ Known.KnownFPClasses = fcFinite;
+
break;
}
}
>From b0758969f1a9121df39c24c4d2fc8aa02793053e Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tim at gymni.ch>
Date: Wed, 28 May 2025 13:43:54 +0000
Subject: [PATCH 7/7] update tests
---
.../GlobalISel/clamp-fmed3-const-combine.ll | 18 ++++++---
.../GlobalISel/clamp-minmax-const-combine.ll | 9 ++++-
.../regbankcombiner-clamp-fmed3-const.mir | 40 ++++++++++++++-----
3 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index ef88a2be47872..fb33d3dbfc336 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
@@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
@@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
@@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul float %a, 2.0
%fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 3c57fe3eb5176..2955fb1f24f8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX10-LABEL: test_min_max_splat_padded_with_undef:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_max_f16 v0, v0, 0
+; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
@@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0
+; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
%maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half poison>, <2 x half> %fmul)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
index a97d905f2a978..129cbcfca6fa5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir
@@ -162,8 +162,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -172,8 +176,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -222,8 +230,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
@@ -307,8 +319,12 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
@@ -317,8 +333,12 @@ body: |
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
- ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
- ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
+ ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
+ ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
More information about the llvm-commits
mailing list