[llvm-branch-commits] [llvm] [AMDGPU] Introduce a "new" target feature `xf32-insts` (PR #115214)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 7 09:17:08 PST 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/115214
>From 1bf7b52751fd5f09b9e9fb4850f5ca6a022cda81 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 6 Nov 2024 16:15:50 -0500
Subject: [PATCH] [AMDGPU] Introduce a "new" target feature `xf32-insts`
The feature itself is not new. Just to use it to guard corresponding
instructions.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 11 +++++++++++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++++
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 8 ++++++--
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 910f5e06a6f3c4..bde61a1f7e58df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1110,6 +1110,13 @@ def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
"Target Requires Code Object V6"
>;
+def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
+ "HasXF32Insts",
+ "true",
+ "Has instructions that support xf32 format, such as "
+ "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
+ >;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1448,6 +1455,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeaturePkFmacF16Inst,
+ FeatureXF32Insts,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicBufferGlobalPkAddF16Insts,
@@ -2289,6 +2297,9 @@ def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
+def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">,
+ AssemblerPredicate<(all_of FeatureXF32Insts)>;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 1ea3beb2855d69..6ff964077d8fd0 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -179,6 +179,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
+ bool HasXF32Insts = false;
/// The maximum number of instructions that may be placed within an S_CLAUSE,
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
/// indicates a lack of S_CLAUSE support.
@@ -1302,6 +1303,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() == GFX12;
}
+ /// \returns true if the target has instructions with xf32 format support.
+ bool hasXF32Insts() const { return HasXF32Insts; }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index cdaf489792a24d..e246d433401f94 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -757,10 +757,12 @@ let Predicates = [isGFX90APlus] in {
let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in {
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
+} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
+
+let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
-
-} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
+} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1
let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
@@ -1764,8 +1766,10 @@ defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>;
defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
+let SubtargetPredicate = HasXF32Insts in {
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
+} // End SubtargetPredicate = HasXF32Insts
let SubtargetPredicate = HasFP8Insts in {
defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>;
defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;
More information about the llvm-branch-commits
mailing list