[llvm] 70fef78 - AMDGPU: MC support for v_cvt_scalef32_pk_f32_[fp|bf]8 of gfx950. (#117383)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 25 09:56:25 PST 2024


Author: Matt Arsenault
Date: 2024-11-25T09:56:22-08:00
New Revision: 70fef78329eb4da338ef8345b5059f9a57ff21a5

URL: https://github.com/llvm/llvm-project/commit/70fef78329eb4da338ef8345b5059f9a57ff21a5
DIFF: https://github.com/llvm/llvm-project/commit/70fef78329eb4da338ef8345b5059f9a57ff21a5.diff

LOG: AMDGPU: MC support for v_cvt_scalef32_pk_f32_[fp|bf]8 of gfx950. (#117383)

OPSEL[0] selects srcword to read.

Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/test/MC/AMDGPU/gfx950_asm_features.s
    llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index e779f06394c01f..b487fe9e46df0b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -899,16 +899,31 @@ def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32,
   let HasOMod = 0;
 }
 
+def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
+                                              VOP3_OPSEL> {
+  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+                          op_sel0:$op_sel);
+  let HasClamp = 0;
+  let HasSrc2 = 0;
+  let HasSrc2Mods = 0;
+  let HasExtVOP3DPP = 0;
+  let HasOpSel = 1;
+  let HasOMod = 0;
+}
+
 let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
   defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
   defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
   defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
+  defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
 }
 
 let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
   defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
   defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
   defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
+  defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
 }
 
 let SubtargetPredicate = isGFX10Plus in {
@@ -1837,9 +1852,11 @@ let OtherPredicates = [HasFP8ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
 defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
 defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
+defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
 }
 let OtherPredicates = [HasBF8ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
 defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
 defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
+defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
 }

diff  --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 3216c81f321864..51ddffb84e64ee 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -549,3 +549,51 @@ v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
 v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index a6c35d192bb035..fe87f7e2d28d29 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -359,3 +359,39 @@
 
 # GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
 0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00


        


More information about the llvm-commits mailing list