[llvm-branch-commits] [llvm] AMDGPU: Add subtarget features for minimum3/maximum3 instructions (PR #116308)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 14 17:43:52 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116308
>From 1eebc858ad7c42b9ef42adfac1a93aa79d7a80f0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 22 May 2024 19:23:24 +0200
Subject: [PATCH] AMDGPU: Add subtarget features for minimum3/maximum3
instructions
gfx12 and gfx950 managed to produce 3 different permutations of this feature.
gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 22 ++++++++++++++++++++++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 11 ++++++++++-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 ++--
3 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d028c1f5ca7613..35dbf86b7c6f36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -137,6 +137,18 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
+def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
+ "HasMinimum3Maximum3F32",
+ "true",
+ "Has v_minimum3_f32 and v_maximum3_f32 instructions"
+>;
+
+def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
+ "HasMinimum3Maximum3F16",
+ "true",
+ "Has v_minimum3_f16 and v_maximum3_f16 instructions"
+>;
+
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
"SupportsXNACK",
"true",
@@ -1263,6 +1275,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
+ FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
>;
@@ -2005,6 +2018,15 @@ def isGFX12Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
+def HasMinimum3Maximum3F32 :
+ Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
+ AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
+
+def HasMinimum3Maximum3F16 :
+ Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
+ AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
+
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 1b06756a8a1016..2e7a06a15bd52a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -242,7 +242,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasForceStoreSC0SC1 = false;
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
-
+ bool HasMinimum3Maximum3F32 = false;
+ bool HasMinimum3Maximum3F16 = false;
bool RequiresCOV6 = false;
// Dummy feature to use for assembler in tablegen.
@@ -1307,6 +1308,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the target has instructions with xf32 format support.
bool hasXF32Insts() const { return HasXF32Insts; }
+ bool hasMinimum3Maximum3F32() const {
+ return HasMinimum3Maximum3F32;
+ }
+
+ bool hasMinimum3Maximum3F16() const {
+ return HasMinimum3Maximum3F16;
+ }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 34ecdb56e8689d..551e8b3a679202 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -226,7 +226,7 @@ let mayRaiseFPException = 0 in {
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
} // End mayRaiseFPException = 0
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
@@ -625,7 +625,7 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
More information about the llvm-branch-commits
mailing list