[llvm] [AMDGPU][NFCI] Add IEEEMinimumMaximumInsts SubtargetFeature (PR #141081)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 10:16:50 PDT 2025
https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/141081
>From 5ec194083f6216abf4e5290a47d96b964d07524e Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 22 May 2025 16:44:04 +0200
Subject: [PATCH 1/3] [AMDGPU][NFC] Rename IEEEMinMax to IEEEMinMaxInsts
Also remove unused hasIEEEMinMax3 which is replaced with
hasMinimum3Maximum3F32 and hasMinimum3Maximum3F16
---
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 +----
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +++--
3 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index f82e6df9bcbfc..27d531eaac6d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2095,7 +2095,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_SADDO, G_SSUBO})
.lower();
- if (ST.hasIEEEMinMax()) {
+ if (ST.hasIEEEMinMaxInsts()) {
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 202e5b38f0a48..08bce273d1ee7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1447,10 +1447,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasIEEEMode() const { return getGeneration() < GFX12; }
// \returns true if the target has IEEE fminimum/fmaximum instructions
- bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
-
- // \returns true if the target has IEEE fminimum3/fmaximum3 instructions
- bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
+ bool hasIEEEMinMaxInsts() const { return getGeneration() >= GFX12; }
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
bool hasRrWGMode() const { return getGeneration() >= GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 586de433ea28a..a6bd2c1ac9859 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -877,7 +877,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- if (Subtarget->hasIEEEMinMax()) {
+ if (Subtarget->hasIEEEMinMaxInsts()) {
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
} else {
@@ -7023,7 +7023,8 @@ SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
if (VT.isVector())
return splitBinaryVectorOp(Op, DAG);
- assert(!Subtarget->hasIEEEMinMax() && !Subtarget->hasMinimum3Maximum3F16() &&
+ assert(!Subtarget->hasIEEEMinMaxInsts() &&
+ !Subtarget->hasMinimum3Maximum3F16() &&
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
"should not need to widen f16 minimum/maximum to v2f16");
>From 068a19b92405d2d81873d2f03a797a45c6726748 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 26 May 2025 18:40:25 +0200
Subject: [PATCH 2/3] Rename to IEEEMinimumMaximumInsts, turn into a proper
subtarget feature
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++++++++++++--
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 ++-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++--
llvm/lib/Target/AMDGPU/SIInstructions.td | 6 +++---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 6 +++---
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 ++--
7 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a9b39eebbcdcf..e9d859026c35f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -143,6 +143,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
+def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
+ "HasIEEEMinimumMaximumInsts",
+ "true",
+ "Has v_minimum/maximum_f16/f32/f64 and v_pk_minimum/maximum_f16 instructions"
+>;
+
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
"HasMinimum3Maximum3F32",
"true",
@@ -1433,8 +1439,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
- FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+ FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
+ FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
>;
@@ -2188,6 +2194,10 @@ def isGFX12Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
+def HasIEEEMinimumMaximumInsts :
+ Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">,
+ AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>;
+
def HasMinimum3Maximum3F32 :
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 27d531eaac6d1..aa678df675fb6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2095,7 +2095,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_SADDO, G_SSUBO})
.lower();
- if (ST.hasIEEEMinMaxInsts()) {
+ if (ST.hasIEEEMinimumMaximumInsts()) {
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 08bce273d1ee7..6611cd2f5a882 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -255,6 +255,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
bool HasAshrPkInsts = false;
+ bool HasIEEEMinimumMaximumInsts = false;
bool HasMinimum3Maximum3F32 = false;
bool HasMinimum3Maximum3F16 = false;
bool HasMinimum3Maximum3PKF16 = false;
@@ -1447,7 +1448,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasIEEEMode() const { return getGeneration() < GFX12; }
// \returns true if the target has IEEE fminimum/fmaximum instructions
- bool hasIEEEMinMaxInsts() const { return getGeneration() >= GFX12; }
+ bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; }
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
bool hasRrWGMode() const { return getGeneration() >= GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a6bd2c1ac9859..936962ceb3cf0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -877,7 +877,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- if (Subtarget->hasIEEEMinMaxInsts()) {
+ if (Subtarget->hasIEEEMinimumMaximumInsts()) {
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
} else {
@@ -7023,7 +7023,7 @@ SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
if (VT.isVector())
return splitBinaryVectorOp(Op, DAG);
- assert(!Subtarget->hasIEEEMinMaxInsts() &&
+ assert(!Subtarget->hasIEEEMinimumMaximumInsts() &&
!Subtarget->hasMinimum3Maximum3F16() &&
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
"should not need to widen f16 minimum/maximum to v2f16");
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 897c30948cf06..dbb65dc3b67d1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3924,21 +3924,21 @@ let True16Predicate = UseFakeTrue16Insts in {
}
} // End SubtargetPredicate = [isGFX9Plus]
-let SubtargetPredicate = isGFX12Plus in {
+let SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}
-let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
+let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}
-let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
+let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = HasIEEEMinimumMaximumInsts in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index f372101cb7b77..1deba8694e4f4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -167,7 +167,7 @@ defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteIntMul]
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
+let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1 in {
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
@@ -177,7 +177,7 @@ let SchedRW = [WriteDoubleAdd] in {
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
} // End SchedRW = [WriteDoubleAdd]
-} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1
+} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0, AddedComplexity = 1
} // End isReMaterializable = 1
@@ -1501,7 +1501,7 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_f16_f32, V_CVT_SR_F16_F32_e64, v2f16>;
}
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index e8db879ca5077..31997f803dfc6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -115,10 +115,10 @@ defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I1
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fmaximum>;
defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fminimum>;
-} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
}
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
>From 2b6efa56a202f0e6a359c9111745c2d3b531d2f5 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 17 Jun 2025 19:03:16 +0200
Subject: [PATCH 3/3] Rebase, update, fix
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 2 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e9d859026c35f..85aeebc88ffbd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -146,7 +146,7 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts",
"HasIEEEMinimumMaximumInsts",
"true",
- "Has v_minimum/maximum_f16/f32/f64 and v_pk_minimum/maximum_f16 instructions"
+ "Has v_minimum/maximum_f16/f32/f64, v_minimummaximum/maximumminimum_f16/f32 and v_pk_minimum/maximum_f16 instructions"
>;
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 936962ceb3cf0..45d98b88f1bf3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13908,7 +13908,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// operand form.
const SDNodeFlags Flags = N->getFlags();
if ((Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM) &&
- !Subtarget->hasIEEEMinMax() && Flags.hasNoNaNs()) {
+ !Subtarget->hasIEEEMinimumMaximumInsts() && Flags.hasNoNaNs()) {
unsigned NewOpc =
Opc == ISD::FMINIMUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
return DAG.getNode(NewOpc, SDLoc(N), VT, Op0, Op1, Flags);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 1deba8694e4f4..d5a8a5197a561 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1506,7 +1506,7 @@ let SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0 in {
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
-} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
+} // End SubtargetPredicate = HasIEEEMinimumMaximumInsts, ReadsModeReg = 0
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
More information about the llvm-commits
mailing list