[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scale_fp4<->f32 of gfx950. (PR #117417)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 22 21:40:15 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117417
OPSEL ASM Syntax for v_cvt_scalef32_pk_f32_fp4 : opsel:[x,y,z]
where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read.
OPSEL ASM Syntax for v_cvt_scalef32_pk_fp4_f32 : opsel:[a,b,c,d]
where, c & d i.e. OPSEL[3 : 2] selects which dst_byte to write.
Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
>From f2ef9b191d96e9bf81d2ae251b62bc4ce89bd027 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Sun, 7 Apr 2024 04:30:50 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scale_fp4<->f32 of gfx950.
OPSEL ASM Syntax for v_cvt_scalef32_pk_f32_fp4 : opsel:[x,y,z]
where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read.
OPSEL ASM Syntax for v_cvt_scalef32_pk_fp4_f32 : opsel:[a,b,c,d]
where, c & d i.e. OPSEL[3 : 2] selects which dst_byte to write.
Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 +
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 21 ++--
llvm/test/MC/AMDGPU/gfx950_asm_features.s | 96 +++++++++++++++++++
llvm/test/MC/AMDGPU/gfx950_err.s | 24 +++++
.../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 72 ++++++++++++++
6 files changed, 222 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2ebbd16aa65de6..64e88cf03b429b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -396,11 +396,17 @@ def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts",
"Has bf8 conversion scale instructions"
>;
+def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
+ "HasFP4ConversionScaleInsts",
+ "true",
+ "Has fp4 conversion scale instructions"
+>;
+
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+",
- [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts]
+ [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
>;
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
@@ -1545,7 +1551,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureBF16ConversionInsts,
FeatureBitOp3Insts,
FeatureFP8ConversionScaleInsts,
- FeatureBF8ConversionScaleInsts
+ FeatureBF8ConversionScaleInsts,
+ FeatureFP4ConversionScaleInsts
])>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2425,6 +2432,9 @@ def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInst
def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">,
AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>;
+def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
+ AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;
+
def HasGDS : Predicate<"Subtarget->hasGDS()">;
def HasGWS : Predicate<"Subtarget->hasGWS()">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ea00751cad1f24..20f573da0ec82b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -52,6 +52,7 @@ class AMDGPUSubtarget {
bool HasTrue16BitInsts = false;
bool HasFP8ConversionScaleInsts = false;
bool HasBF8ConversionScaleInsts = false;
+ bool HasFP4ConversionScaleInsts = false;
bool EnableRealTrue16Insts = false;
bool HasBF16ConversionInsts = false;
bool HasMadMixInsts = false;
@@ -181,6 +182,8 @@ class AMDGPUSubtarget {
bool hasBF8ConversionScaleInsts() const { return HasBF8ConversionScaleInsts; }
+ bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }
+
bool hasMadMacF32Insts() const {
return HasMadMacF32Insts || !isGCN();
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 4425a26d06b12c..cc5255c1e6ef4d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -887,7 +887,7 @@ class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPPro
let HasOMod = 0;
}
-def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
+def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -899,7 +899,7 @@ def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32,
let HasOMod = 0;
}
-def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
+def VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
VOP3_OPSEL> {
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -928,8 +928,8 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
- defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
- defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
+ defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
@@ -937,12 +937,17 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
- defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
- defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
+ defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
}
+let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
+ defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
+ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
+}
+
let SubtargetPredicate = isGFX10Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1881,3 +1886,7 @@ defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
}
+let OtherPredicates = [HasFP4ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
+defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
+}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 8cdf5a0bfdad35..1c825c105eb215 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -693,3 +693,99 @@ v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00]
+v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3|
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]
\ No newline at end of file
diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s
index 7db9c61197113d..56f4eff0a2793a 100644
--- a/llvm/test/MC/AMDGPU/gfx950_err.s
+++ b/llvm/test/MC/AMDGPU/gfx950_err.s
@@ -77,3 +77,27 @@ v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 mul:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2
+
+// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
+v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 757e533f77f789..1eac7628a22e7f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -467,3 +467,75 @@
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00]
+0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00]
+0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00
+
+# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00]
+0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04]
+0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44]
+0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44
+
+# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
+0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02
More information about the llvm-branch-commits
mailing list