[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scalef32_pk_{f|bf}16_fp4 of gfx950. (PR #117418)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 22 21:42:06 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
OPSEL ASM Syntax for v_cvt_scalef32_pk_{f|bf}16_fp4 : opsel:[x,y,z]
where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read.
Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax.
Co-authored-by: Pravin Jagtap <Pravin.Jagtap@<!-- -->amd.com>
---
Full diff: https://github.com/llvm/llvm-project/pull/117418.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+8-4)
- (modified) llvm/test/MC/AMDGPU/gfx950_asm_features.s (+97-1)
- (modified) llvm/test/MC/AMDGPU/gfx950_err.s (+24)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt (+72)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index cc5255c1e6ef4d..cf00910210e0bd 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -899,7 +899,7 @@ def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f3
let HasOMod = 0;
}
-def VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
+class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -929,7 +929,7 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
- defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
@@ -938,14 +938,16 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
- defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
- defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
+ defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
+ defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
}
let SubtargetPredicate = isGFX10Plus in {
@@ -1889,4 +1891,6 @@ defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
let OtherPredicates = [HasFP4ConversionScaleInsts] in {
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
+defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
+defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 1c825c105eb215..85cd02aa714175 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -788,4 +788,100 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1]
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
-v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]
\ No newline at end of file
+v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
\ No newline at end of file
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 56f4eff0a2793a..89167ae35e2967 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -101,3 +101,27 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 1eac7628a22e7f..80b5835fab1084 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -539,3 +539,75 @@
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00]
+0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
+0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00
``````````
</details>
https://github.com/llvm/llvm-project/pull/117418
More information about the llvm-branch-commits
mailing list