[llvm-branch-commits] [llvm] AMDGPU: MC support for v_cvt_scale_[f16|f32]_bf8 of gfx950. (PR #117381)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Nov 22 13:14:55 PST 2024


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117381

OPSEL ASM Syntax: opsel:[x,y,z]
where,
    opsel[x] = Inst{11} = src0_modifier{2}
    opsel[y] = Inst{12} = src1_modifier{2}
    opsel[z] = Inst{14} = src0_modifier{3}
Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax.

Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>

>From e69fd7d36b7ac6403f5fb692f8912c82a884c902 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Sat, 6 Apr 2024 07:19:15 -0400
Subject: [PATCH] AMDGPU: MC support for v_cvt_scale_[f16|f32]_bf8 of gfx950.

OPSEL ASM Syntax: opsel:[x,y,z]
where,
    opsel[x] = Inst{11} = src0_modifier{2}
    opsel[y] = Inst{12} = src1_modifier{2}
    opsel[z] = Inst{14} = src0_modifier{3}
Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax.

Co-authored-by: Pravin Jagtap <Pravin.Jagtap at amd.com>
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  14 +-
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      |   3 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |   9 +
 llvm/test/MC/AMDGPU/gfx950_asm_features.s     | 192 ++++++++++++++++++
 .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt  | 144 +++++++++++++
 5 files changed, 360 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5a01001fd906d3..2ebbd16aa65de6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -390,11 +390,17 @@ def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts",
   "Has fp8 conversion scale instructions"
 >;
 
+def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts",
+  "HasBF8ConversionScaleInsts",
+  "true",
+  "Has bf8 conversion scale instructions"
+>;
+
 def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
   "GFX950Insts",
   "true",
   "Additional instructions for GFX950+",
-  [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts]
+  [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts]
 >;
 
 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
@@ -1538,7 +1544,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
    FeaturePrngInst,
    FeatureBF16ConversionInsts,
    FeatureBitOp3Insts,
-   FeatureFP8ConversionScaleInsts
+   FeatureFP8ConversionScaleInsts,
+   FeatureBF8ConversionScaleInsts
    ])>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2415,6 +2422,9 @@ def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
 def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">,
   AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>;
 
+def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">,
+  AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>;
+
 def HasGDS : Predicate<"Subtarget->hasGDS()">;
 
 def HasGWS : Predicate<"Subtarget->hasGWS()">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c5aaed3a6b9ae8..ea00751cad1f24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -51,6 +51,7 @@ class AMDGPUSubtarget {
   bool Has16BitInsts = false;
   bool HasTrue16BitInsts = false;
   bool HasFP8ConversionScaleInsts = false;
+  bool HasBF8ConversionScaleInsts = false;
   bool EnableRealTrue16Insts = false;
   bool HasBF16ConversionInsts = false;
   bool HasMadMixInsts = false;
@@ -178,6 +179,8 @@ class AMDGPUSubtarget {
 
   bool hasFP8ConversionScaleInsts() const { return HasFP8ConversionScaleInsts; }
 
+  bool hasBF8ConversionScaleInsts() const { return HasBF8ConversionScaleInsts; }
+
   bool hasMadMacF32Insts() const {
     return HasMadMacF32Insts || !isGCN();
   }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 2534f26492e6ea..89c6a3f09d78c2 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -892,6 +892,11 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
   defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
 }
 
+let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
+  defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
+  defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
+}
+
 let SubtargetPredicate = isGFX10Plus in {
   let isCommutable = 1, isReMaterializable = 1 in {
     defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1818,3 +1823,7 @@ let OtherPredicates = [HasFP8ConversionScaleInsts] in {
 defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
 defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
 }
+let OtherPredicates = [HasBF8ConversionScaleInsts] in {
+defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
+defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
+}
diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
index 55ca8f94600995..ea60735deb888b 100644
--- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s
@@ -309,3 +309,195 @@ v_cvt_scalef32_f32_fp8 v1, 33, v3 op_sel:[0,1,1]
 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
 // GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00]
 v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s1, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 11, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00]
+ v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00]
+v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00]
+ v_cvt_scalef32_f32_bf8 v1, v2, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s1, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 11, v3
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00]
+ v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1]
+
+// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
+// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
+v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
index 063fc70e04b5c6..395910812f18e5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt
@@ -179,3 +179,147 @@
 
 # GFX950: v_cvt_scalef32_f32_fp8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00]
 0x01,0x58,0x3b,0xd2,0xac,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x10,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x18,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x48,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x50,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x58,0x4b,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00]
+0x01,0x00,0x4b,0xd2,0x01,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00]
+0x01,0x08,0x4b,0xd2,0x02,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00]
+0x01,0x10,0x4b,0xd2,0x03,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00]
+0x01,0x18,0x4b,0xd2,0x04,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00]
+0x01,0x40,0x4b,0xd2,0x01,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00]
+0x01,0x48,0x4b,0xd2,0x02,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00]
+0x01,0x50,0x4b,0xd2,0x03,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00]
+0x01,0x58,0x4b,0xd2,0x04,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3       ; encoding: [0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00]
+0x01,0x00,0x4b,0xd2,0x8b,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00]
+0x01,0x08,0x4b,0xd2,0x96,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00]
+0x01,0x10,0x4b,0xd2,0xa1,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00]
+0x01,0x18,0x4b,0xd2,0xac,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 11, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00]
+0x01,0x50,0x4b,0xd2,0x8b,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00]
+0x01,0x48,0x4b,0xd2,0x96,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00]
+0x01,0x50,0x4b,0xd2,0xa1,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f16_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00]
+0x01,0x58,0x4b,0xd2,0xac,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x08,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x10,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x18,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x40,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x48,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x50,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00]
+0x01,0x58,0x3c,0xd2,0x02,0x07,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00]
+0x01,0x00,0x3c,0xd2,0x01,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00]
+0x01,0x08,0x3c,0xd2,0x02,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00]
+0x01,0x10,0x3c,0xd2,0x03,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00]
+0x01,0x18,0x3c,0xd2,0x04,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s1, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00]
+0x01,0x40,0x3c,0xd2,0x01,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s2, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00]
+0x01,0x48,0x3c,0xd2,0x02,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s3, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00]
+0x01,0x50,0x3c,0xd2,0x03,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, s4, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00]
+0x01,0x58,0x3c,0xd2,0x04,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3       ; encoding: [0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00]
+0x01,0x00,0x3c,0xd2,0x8b,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00]
+0x01,0x08,0x3c,0xd2,0x96,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00]
+0x01,0x10,0x3c,0xd2,0xa1,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00]
+0x01,0x18,0x3c,0xd2,0xac,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 11, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00]
+0x01,0x40,0x3c,0xd2,0x8b,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1] ; encoding: [0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00]
+0x01,0x48,0x3c,0xd2,0x96,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 33, v3 op_sel:[0,1,1] ; encoding: [0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00]
+0x01,0x50,0x3c,0xd2,0xa1,0x06,0x02,0x00
+
+# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
+0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00



More information about the llvm-branch-commits mailing list