[llvm] AMDGPU: Use HasFP8ConversionInsts appropriately, NFC (PR #82024)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 10:38:31 PST 2024
https://github.com/changpeng created https://github.com/llvm/llvm-project/pull/82024
The corresponding fp8 conversion instructions are available for a sub-target when and only when the subtarget "HasFP8ConversionInsts". We should not assume all the future sub-targets (gfx12+) have this feature.
>From fe478c263163c3769019c294c4ce03f463e5be33 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Fri, 16 Feb 2024 09:55:02 -0800
Subject: [PATCH] AMDGPU: Use HasFP8ConversionInsts appropriately, NFC
The corresponding fp8 conversion instructions are available for a
sub-target when and only when the subtarget "HasFP8ConversionInsts".
We should not assume all the future sub-targets (gfx12+) have this
feature.
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 9 +++++----
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 ++++
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5461c645e608fe..80071f919fc88b 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -635,8 +635,8 @@ def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]>
let Src1VOP3DPP = Src1RC64;
}
-let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
- SchedRW = [WriteFloatCvt] in {
+let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
+ mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
@@ -653,7 +653,7 @@ class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
(inst_e32 $src))
>;
-let SubtargetPredicate = isGFX12Plus in {
+let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in {
foreach Index = [0, 1, 2, 3] in {
def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
@@ -898,7 +898,7 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
-
+let OtherPredicates = [HasFP8ConversionInsts] in {
// Define VOP1 instructions using the pseudo instruction with its old profile and
// VOP3 using the OpSel profile for the pseudo instruction.
defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">;
@@ -912,6 +912,7 @@ defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8
defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">;
defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">;
+}
defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 8d965d3b9041d5..e8766174a40c9a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -667,6 +667,7 @@ class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst>
!if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0)
>;
+let SubtargetPredicate = HasFP8ConversionInsts in {
foreach Index = [0, -1] in {
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
@@ -676,6 +677,7 @@ foreach Index = [0, 1, 2, 3] in {
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
}
+} // End SubtargetPredicate = HasFP8ConversionInsts.
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
@@ -1038,10 +1040,12 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+let SubtargetPredicate = HasFP8ConversionInsts in {
defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>;
defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>;
defm V_CVT_SR_FP8_F32 : VOP3Only_Realtriple_gfx12<0x36b>;
defm V_CVT_SR_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36c>;
+}
//===----------------------------------------------------------------------===//
// GFX11, GFX12
More information about the llvm-commits
mailing list