[llvm] AMDGPU: MC support for v_cvt_scalef32_pk_{fp8|bf8}_f32 of gfx950. (PR #117382)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 09:50:28 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117382
>From 5ec267bdeb3d83197b0200fda79be4c0a156dce1 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Sat, 6 Apr 2024 09:17:03 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk_{fp8|bf8}_f32 of
gfx950.
OPSEL[3] selects low/high 16 bits of dest write.
Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +++++++
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 48 +++++++++++++++++++
.../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 36 ++++++++++++++
3 files changed, 100 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 89c6a3f09d78c2..e779f06394c01f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -887,14 +887,28 @@ class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPPro
let HasOMod = 0;
}
+def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
+ VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ FP32InputMods:$src2_modifiers, Src2RC64:$src2,
+ op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+ let HasOpSel = 1;
+ let HasOMod = 0;
+}
+
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
+ defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
}
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
+ defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
}
let SubtargetPredicate = isGFX10Plus in {
@@ -1822,8 +1836,10 @@ defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
let OtherPredicates = [HasFP8ConversionScaleInsts] in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
+defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
}
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
+defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index ea60735deb888b..3216c81f321864 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -501,3 +501,51 @@ v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1]
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 395910812f18e5..a6c35d192bb035 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -323,3 +323,39 @@
# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02
More information about the llvm-commits
mailing list