[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk_{fp|bf}8_{f|bf}16 of gfx950. (PR #117384)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 22 13:30:42 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117384
>From d5c3e0b8dd48841b1b23bb847919683d9a5d8506 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Sun, 7 Apr 2024 01:57:15 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk_{fp|bf}8_{f|bf}16 of
gfx950.
OPSEL ASM Syntax: opsel:[x,y,z]
where,
opsel[z] = Inst{14} = src0_modifier{3}
Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax.
Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 21 ++++
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 96 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx950_err.s | 48 ++++++++++
.../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 72 ++++++++++++++
4 files changed, 237 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index b487fe9e46df0b..4425a26d06b12c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -912,11 +912,26 @@ def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32,
let HasOMod = 0;
}
+def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f16, f32, untyped]>,
+ VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasSrc2 = 0;
+ let HasSrc2Mods = 0;
+ let HasExtVOP3DPP = 0;
+ let HasOpSel = 1;
+ let HasOMod = 0;
+}
+
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
+ defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -924,6 +939,8 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
+ defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
let SubtargetPredicate = isGFX10Plus in {
@@ -1853,10 +1870,14 @@ defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
+defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
+defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
}
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
+defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
+defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 51ddffb84e64ee..8cdf5a0bfdad35 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -597,3 +597,99 @@ v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0]
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_fp8_f16 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf8_f16 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
+v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1]
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 3f9bf3beef3aac..7db9c61197113d 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -29,3 +29,51 @@ v_permlane16_swap_b32_e32 v1, v2 fi:0
// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction
v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 fi:1
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index fe87f7e2d28d29..757e533f77f789 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -395,3 +395,75 @@
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
+0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20
+
+# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00
More information about the llvm-branch-commits
mailing list