[clang] [clang-tools-extra] [compiler-rt] [llvm] [flang] [libc] [lld] [lldb] [libcxx] [AMDGPU][GFX12] VOP encoding and codegen - add support for v_cvt fp8/… (PR #78414)
Mirko Brkušanin via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 22 05:34:43 PST 2024
================
@@ -626,11 +629,82 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-foreach Index = [0, -1] in {
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
- V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
- V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+let SubtargetPredicate = isGFX9Only in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+ }
+}
+
+
+// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions.
+def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> {
+ let InsVOP3OpSel = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+ clampmod:$clamp, omod:$omod, op_sel0:$op_sel);
+
+ let HasOpSel = 1;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, i32, untyped]> {
+ let InsVOP3OpSel = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+ Src1Mod:$src1_modifiers, Src1RC64:$src1,
+ clampmod:$clamp, omod:$omod, op_sel0:$op_sel);
+ let AsmVOP3OpSel = !subst(", $src1_modifiers", "", getAsmVOP3OpSel<2, 0, 0, 1, 1, 0>.ret);
+
+ let HasOpSel = 1;
+ let HasExtDPP = 1;
+ let HasExtVOP3DPP = 1;
+
+ let Src1VOP3DPP = Src1RC64;
+ let AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3OpSel>.ret;
+ let AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3OpSel>.ret;
+}
+
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+}
+
+class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 !if(index{0}, SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), $src,
+ !if(index{1}, SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), (i32 0),
----------------
mbrkusanin wrote:
Looks like SRCMODS.OP_SEL_1 does nothing here. These should be 0.
Tests are in llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll:
test_cvt_f32_fp8_byte0
test_cvt_f32_fp8_byte1
test_cvt_f32_fp8_byte2
test_cvt_f32_fp8_byte3
https://github.com/llvm/llvm-project/pull/78414
More information about the cfe-commits
mailing list