[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk_{bf|f}16_{bf|fp}8 of gfx950. (PR #117593)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Nov 25 13:55:57 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117593

>From af979999871bc55ca15ab6eb0e60df53987132b4 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Mon, 8 Apr 2024 07:44:32 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk_{bf|f}16_{bf|fp}8 of
 gfx950.

OPSEL[0] selects src_word to read.

Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  8 ++
 llvm/test/MC/AMDGPU/gfx950_asm_features.s     | 96 +++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx950_err.s              | 50 +++++++++-
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  | 72 ++++++++++++++
 4 files changed, 225 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 764a2275205665..fdffb2c36dcccf 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -945,6 +945,8 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
   defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
   defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
   defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
+  defm V_CVT_SCALEF32_PK_F16_FP8    : VOP3Inst<"v_cvt_scalef32_pk_f16_fp8",  VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
+  defm V_CVT_SCALEF32_PK_BF16_FP8   : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
 }
 
 let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -954,6 +956,8 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
   defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
   defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
   defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
+  defm V_CVT_SCALEF32_PK_F16_BF8    : VOP3Inst<"v_cvt_scalef32_pk_f16_bf8",  VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
+  defm V_CVT_SCALEF32_PK_BF16_BF8   : VOP3Inst<"v_cvt_scalef32_pk_bf16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
 }
 
 let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1908,6 +1912,8 @@ defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
 defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
 defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
 defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
+defm V_CVT_SCALEF32_PK_F16_FP8  : VOP3OpSel_Real_gfx9<0x248>;
+defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>;
 }
 let OtherPredicates = [HasBF8ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
@@ -1916,6 +1922,8 @@ defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
 defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
 defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
 defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
+defm V_CVT_SCALEF32_PK_F16_BF8  : VOP3OpSel_Real_gfx9<0x249>;
+defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3OpSel_Real_gfx9<0x26a>;
 }
 let OtherPredicates = [HasFP4ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 1aef267537aa55..e505b6ff4ad58b 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -929,3 +929,99 @@ v_cvt_scalef32_pk32_fp6_bf16 v[20:25], v[10:25], v8
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00]
 v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3    ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3    ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3     ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3    ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3    ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3     ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3   ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3   ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3    ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3   ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3   ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3    ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0]
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index f81a240701d949..3163370f649100 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -244,4 +244,52 @@ v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 mul:2
 v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 div:2
 
 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
-v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2
\ No newline at end of file
+v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp8 v[20:25], v[10:25], v8 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_bf8 v[20:25], v[10:25], v8 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp8 v[20:25], v[10:25], v8 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp div:2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 927366b9a410bc..99b5f6f84ea94d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -641,3 +641,75 @@
 
 # GFX950: v_cvt_scalef32_pk32_fp6_f16 v[20:25], v[10:25], v8 ; encoding: [0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00]
 0x14,0x00,0x58,0xd2,0x0a,0x11,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3    ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3    ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x48,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3     ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x48,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x48,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x48,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x48,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3    ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x49,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3    ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x49,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3     ; encoding: [0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x49,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x49,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x49,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x49,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3   ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x69,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3   ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x69,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3    ; encoding: [0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x69,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x69,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x69,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x69,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3   ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x6a,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3   ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x6a,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3    ; encoding: [0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x6a,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x6a,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x6a,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00



More information about the llvm-branch-commits mailing list