[llvm] bc1f85d - AMDGPU: Support packed bf16 instructions on gfx1250 (#150283)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 23 12:01:27 PDT 2025


Author: Changpeng Fang
Date: 2025-07-23T12:01:23-07:00
New Revision: bc1f85d234a8e8e4d1bcfb2126e7c8ec8c8f5b3d

URL: https://github.com/llvm/llvm-project/commit/bc1f85d234a8e8e4d1bcfb2126e7c8ec8c8f5b3d
DIFF: https://github.com/llvm/llvm-project/commit/bc1f85d234a8e8e4d1bcfb2126e7c8ec8c8f5b3d.diff

LOG: AMDGPU: Support packed bf16 instructions on gfx1250 (#150283)

Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/test/CodeGen/AMDGPU/bf16-math.ll
    llvm/test/MC/AMDGPU/gfx1250_asm_vop3p.s
    llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3p.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e4e7bdce950ac..1c7ee724fef09 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -559,6 +559,12 @@ def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts",
   "Has bf16 conversion instructions"
 >;
 
+def FeatureBF16PackedInsts : SubtargetFeature<"bf16-pk-insts",
+  "HasBF16PackedInsts",
+  "true",
+  "Has bf16 packed instructions (fma, add, mul, max, min)"
+>;
+
 def FeatureVOP3P : SubtargetFeature<"vop3p",
   "HasVOP3PInsts",
   "true",
@@ -1989,6 +1995,7 @@ def FeatureISAVersion12_50 : FeatureSet<
    FeatureTransposeLoadF4F6Insts,
    FeatureBF16TransInsts,
    FeatureBF16ConversionInsts,
+   FeatureBF16PackedInsts,
    FeatureCvtPkF16F32Inst,
    FeatureMinimum3Maximum3PKF16,
    FeaturePrngInst,
@@ -2472,6 +2479,9 @@ def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">,
 def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">,
   AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>;
 
+def HasBF16PackedInsts : Predicate<"Subtarget->hasBF16PackedInsts()">,
+  AssemblerPredicate<(all_of FeatureBF16PackedInsts)>;
+
 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
   AssemblerPredicate<(all_of FeatureVOP3P)>;
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 1e44be8e47201..6878744496cfe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -61,6 +61,7 @@ class AMDGPUSubtarget {
   bool EnableRealTrue16Insts = false;
   bool HasBF16TransInsts = false;
   bool HasBF16ConversionInsts = false;
+  bool HasBF16PackedInsts = false;
   bool HasMadMixInsts = false;
   bool HasMadMacF32Insts = false;
   bool HasDsSrc2Insts = false;
@@ -209,6 +210,8 @@ class AMDGPUSubtarget {
     return HasBF16ConversionInsts;
   }
 
+  bool hasBF16PackedInsts() const { return HasBF16PackedInsts; }
+
   bool hasMadMixInsts() const {
     return HasMadMixInsts;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d65c3ae76566b..441034b508c10 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -944,6 +944,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
   }
 
+  if (Subtarget->hasBF16PackedInsts()) {
+    setOperationAction(
+        {ISD::FADD, ISD::FMUL, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMA},
+        MVT::v2bf16, Legal);
+  }
+
   if (Subtarget->hasBF16TransInsts()) {
     setOperationAction({ISD::FEXP2, ISD::FLOG2, ISD::FSQRT}, MVT::bf16, Legal);
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index bd4995b3c6e6f..b8537513ce986 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2873,10 +2873,12 @@ def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
 
 def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
 def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
+def VOP_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, untyped]>;
 def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
 
 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+def VOP_V2BF16_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, v2bf16]>;
 def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
 def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
 

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index ed8e547419ceb..43f9bc946e6fe 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1196,6 +1196,14 @@ let isCommutable = 1, isReMaterializable = 1 in {
 
   let SubtargetPredicate = HasPkMovB32, isAsCheapAsAMove = 1 in
   defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
+
+  let SubtargetPredicate = HasBF16PackedInsts in {
+    defm V_PK_ADD_BF16     : VOP3PInst<"v_pk_add_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fadd>;
+    defm V_PK_MUL_BF16     : VOP3PInst<"v_pk_mul_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fmul>;
+    defm V_PK_MIN_NUM_BF16 : VOP3PInst<"v_pk_min_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fminnum_like>;
+    defm V_PK_MAX_NUM_BF16 : VOP3PInst<"v_pk_max_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fmaxnum_like>;
+    defm V_PK_FMA_BF16     : VOP3PInst<"v_pk_fma_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fma>;
+  }
 } // End isCommutable = 1, isReMaterializable = 1
 
 def : AMDGPUMnemonicAlias<"v_accvgpr_read",  "v_accvgpr_read_b32">;
@@ -2222,6 +2230,11 @@ defm V_PK_MAX3_I16     : VOP3P_Real_gfx1250<0x2f>;
 defm V_PK_MAX3_U16     : VOP3P_Real_gfx1250<0x30>;
 defm V_PK_MIN3_I16     : VOP3P_Real_gfx1250<0x31>;
 defm V_PK_MIN3_U16     : VOP3P_Real_gfx1250<0x32>;
+defm V_PK_FMA_BF16     : VOP3P_Real_gfx1250<0x11>;
+defm V_PK_ADD_BF16     : VOP3P_Real_gfx1250<0x23>;
+defm V_PK_MUL_BF16     : VOP3P_Real_gfx1250<0x2a>;
+defm V_PK_MIN_NUM_BF16 : VOP3P_Real_gfx1250<0x2b>;
+defm V_PK_MAX_NUM_BF16 : VOP3P_Real_gfx1250<0x2c>;
 
 defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
 defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;

diff  --git a/llvm/test/CodeGen/AMDGPU/bf16-math.ll b/llvm/test/CodeGen/AMDGPU/bf16-math.ll
index b49614d05700a..1adf5423356a2 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16-math.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16-math.ll
@@ -24,6 +24,364 @@ define amdgpu_ps void @llvm_sqrt_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
   ret void
 }
 
+define amdgpu_ps void @v_test_add_v2bf16_vv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b) {
+; GCN-LABEL: v_test_add_v2bf16_vv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, v3
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fadd <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_add_v2bf16_vs(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_add_v2bf16_vs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, s0
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fadd <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_add_v2bf16_ss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_add_v2bf16_ss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, s0, s1
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fadd <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_add_v2bf16_vc(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_add_v2bf16_vc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, 2.0 op_sel_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fadd <2 x bfloat> %a, <bfloat 2.0, bfloat 2.0>
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_add_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_add_v2bf16_vl:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, 0x42c83f80, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fadd <2 x bfloat> %a, <bfloat 1.0, bfloat 100.0>
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_vv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b) {
+; GCN-LABEL: v_test_sub_v2bf16_vv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_vs(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_sub_v2bf16_vs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, s0 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_ss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_sub_v2bf16_ss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, s0, s1 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> %a, %b
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_vc(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_sub_v2bf16_vc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, -2.0 op_sel_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> %a, <bfloat 2.0, bfloat 2.0>
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_sub_v2bf16_vl:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, 0xc2c8bf80, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> %a, <bfloat 1.0, bfloat 100.0>
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_lv(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_sub_v2bf16_lv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, 0x42c83f80, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> <bfloat 1.0, bfloat 100.0>, %a
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_sub_v2bf16_iv(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_sub_v2bf16_iv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_bf16 v2, v2, 1.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %add = fsub <2 x bfloat> <bfloat 1.0, bfloat 1.0>, %a
+  store <2 x bfloat> %add, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_mul_v2bf16_vv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b) {
+; GCN-LABEL: v_test_mul_v2bf16_vv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_mul_bf16 v2, v2, v3
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %mul = fmul <2 x bfloat> %a, %b
+  store <2 x bfloat> %mul, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_mul_v2bf16_vs(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_mul_v2bf16_vs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_mul_bf16 v2, v2, s0
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %mul = fmul <2 x bfloat> %a, %b
+  store <2 x bfloat> %mul, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_mul_v2bf16_ss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_mul_v2bf16_ss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_mul_bf16 v2, s0, s1
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %mul = fmul <2 x bfloat> %a, %b
+  store <2 x bfloat> %mul, ptr addrspace(1) %out
+  ret void
+}
+
+; FIXME: We can do better folding inline constant instead of a literal.
+
+define amdgpu_ps void @v_test_mul_v2bf16_vc(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_mul_v2bf16_vc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_mul_bf16 v2, v2, 0.5 op_sel_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %mul = fmul <2 x bfloat> %a, <bfloat 0.5, bfloat 0.5>
+  store <2 x bfloat> %mul, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_mul_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_mul_v2bf16_vl:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_mul_bf16 v2, 0x42c83f80, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %mul = fmul <2 x bfloat> %a, <bfloat 1.0, bfloat 100.0>
+  store <2 x bfloat> %mul, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_min_v2bf16_vv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b) {
+; GCN-LABEL: v_test_min_v2bf16_vv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_min_num_bf16 v2, v2, v3
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %min = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %min, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_min_v2bf16_vs(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_min_v2bf16_vs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_min_num_bf16 v2, v2, s0
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %min = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %min, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_min_v2bf16_ss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_min_v2bf16_ss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_min_num_bf16 v2, s0, s1
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %min = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %min, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_min_v2bf16_vc(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_min_v2bf16_vc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_min_num_bf16 v2, v2, 0.5 op_sel_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %min = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> <bfloat 0.5, bfloat 0.5>)
+  store <2 x bfloat> %min, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_min_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_min_v2bf16_vl:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_min_num_bf16 v2, 0x42c83f80, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %min = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> <bfloat 1.0, bfloat 100.0>)
+  store <2 x bfloat> %min, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_max_v2bf16_vv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b) {
+; GCN-LABEL: v_test_max_v2bf16_vv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_max_num_bf16 v2, v2, v3
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %max = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %max, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_max_v2bf16_vs(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_max_v2bf16_vs:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_max_num_bf16 v2, v2, s0
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %max = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %max, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_max_v2bf16_ss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_max_v2bf16_ss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_max_num_bf16 v2, s0, s1
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %max = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  store <2 x bfloat> %max, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_max_v2bf16_vc(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_max_v2bf16_vc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_max_num_bf16 v2, v2, 0.5 op_sel_hi:[1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %max = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> <bfloat 0.5, bfloat 0.5>)
+  store <2 x bfloat> %max, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_max_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_max_v2bf16_vl:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_max_num_bf16 v2, 0x42c83f80, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %max = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> <bfloat 1.0, bfloat 100.0>)
+  store <2 x bfloat> %max, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_fma_v2bf16_vvv(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
+; GCN-LABEL: v_test_fma_v2bf16_vvv:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_fma_bf16 v2, v2, v3, v4
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %fma = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
+  store <2 x bfloat> %fma, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_fma_v2bf16_vss(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b, <2 x bfloat> inreg %c) {
+; GCN-LABEL: v_test_fma_v2bf16_vss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_fma_bf16 v2, v2, s0, s1
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %fma = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
+  store <2 x bfloat> %fma, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_fma_v2bf16_sss(ptr addrspace(1) %out, <2 x bfloat> inreg %a, <2 x bfloat> inreg %b, <2 x bfloat> inreg %c) {
+; GCN-LABEL: v_test_fma_v2bf16_sss:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_mov_b32_e32 v2, s2
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_pk_fma_bf16 v2, s0, s1, v2
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %fma = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c)
+  store <2 x bfloat> %fma, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_fma_v2bf16_vsc(ptr addrspace(1) %out, <2 x bfloat> %a, <2 x bfloat> inreg %b) {
+; GCN-LABEL: v_test_fma_v2bf16_vsc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_fma_bf16 v2, v2, s0, 0.5 op_sel_hi:[1,1,0]
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %fma = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> <bfloat 0.5, bfloat 0.5>)
+  store <2 x bfloat> %fma, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_test_fma_v2bf16_vll(ptr addrspace(1) %out, <2 x bfloat> %a) {
+; GCN-LABEL: v_test_fma_v2bf16_vll:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_mov_b32 s0, 0x42c83f80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_pk_fma_bf16 v2, v2, s0, 0x43484000
+; GCN-NEXT:    global_store_b32 v[0:1], v2, off
+; GCN-NEXT:    s_endpgm
+  %fma = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %a, <2 x bfloat> <bfloat 1.0, bfloat 100.0>, <2 x bfloat> <bfloat 2.0, bfloat 200.0>)
+  store <2 x bfloat> %fma, ptr addrspace(1) %out
+  ret void
+}
+
 define amdgpu_ps void @llvm_log2_bf16_v(ptr addrspace(1) %out, bfloat %src) {
 ; GCN-LABEL: llvm_log2_bf16_v:
 ; GCN:       ; %bb.0:
@@ -68,6 +426,9 @@ define amdgpu_ps void @llvm_exp2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
   ret void
 }
 
+declare <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+declare <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>)
 declare bfloat @llvm.sqrt.bf16(bfloat)
 declare bfloat @llvm.log2.bf16(bfloat)
 declare bfloat @llvm.exp2.bf16(bfloat)

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3p.s
index 4fd3d0d2755a1..4b1a6e655b4db 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3p.s
@@ -725,3 +725,303 @@ v_pk_max3_u16 v10, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,1]
 v_pk_max3_u16 v10, s1, 100, v3 op_sel:[1,0,0] op_sel_hi:[0,1,1] clamp
 // GFX1250: v_pk_max3_u16 v10, s1, 0x64, v3 op_sel:[1,0,0] op_sel_hi:[0,1,1] clamp ; encoding: [0x0a,0xc8,0x30,0xcc,0x01,0xfe,0x0d,0x14,0x64,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, v1, v2
+// GFX1250: v_pk_add_bf16 v5, v1, v2                ; encoding: [0x05,0x40,0x23,0xcc,0x01,0x05,0x02,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, v255, v255
+// GFX1250: v_pk_add_bf16 v5, v255, v255            ; encoding: [0x05,0x40,0x23,0xcc,0xff,0xff,0x03,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, s1, s2
+// GFX1250: v_pk_add_bf16 v5, s1, s2                ; encoding: [0x05,0x40,0x23,0xcc,0x01,0x04,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, s105, s105
+// GFX1250: v_pk_add_bf16 v5, s105, s105            ; encoding: [0x05,0x40,0x23,0xcc,0x69,0xd2,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, vcc_lo, ttmp15
+// GFX1250: v_pk_add_bf16 v5, vcc_lo, ttmp15        ; encoding: [0x05,0x40,0x23,0xcc,0x6a,0xf6,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, vcc_hi, 0xfe0b
+// GFX1250: v_pk_add_bf16 v5, vcc_hi, 0xfe0b        ; encoding: [0x05,0x40,0x23,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, ttmp15, src_scc
+// GFX1250: v_pk_add_bf16 v5, ttmp15, src_scc       ; encoding: [0x05,0x40,0x23,0xcc,0x7b,0xfa,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, m0, 0.5
+// GFX1250: v_pk_add_bf16 v5, m0, 0.5               ; encoding: [0x05,0x40,0x23,0xcc,0x7d,0xe0,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, exec_lo, -1
+// GFX1250: v_pk_add_bf16 v5, exec_lo, -1           ; encoding: [0x05,0x40,0x23,0xcc,0x7e,0x82,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, exec_hi, null
+// GFX1250: v_pk_add_bf16 v5, exec_hi, null         ; encoding: [0x05,0x40,0x23,0xcc,0x7f,0xf8,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, null, exec_lo
+// GFX1250: v_pk_add_bf16 v5, null, exec_lo         ; encoding: [0x05,0x40,0x23,0xcc,0x7c,0xfc,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX1250: v_pk_add_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x23,0xcc,0xc1,0xfe,0x00,0x20]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX1250: v_pk_add_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x23,0xcc,0xf0,0xfa,0x00,0x58]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX1250: v_pk_add_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x23,0xcc,0xfd,0xd4,0x00,0x10]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_add_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX1250: v_pk_add_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x23,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, v1, v2
+// GFX1250: v_pk_mul_bf16 v5, v1, v2                ; encoding: [0x05,0x40,0x2a,0xcc,0x01,0x05,0x02,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, v255, v255
+// GFX1250: v_pk_mul_bf16 v5, v255, v255            ; encoding: [0x05,0x40,0x2a,0xcc,0xff,0xff,0x03,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, s1, s2
+// GFX1250: v_pk_mul_bf16 v5, s1, s2                ; encoding: [0x05,0x40,0x2a,0xcc,0x01,0x04,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, s105, s105
+// GFX1250: v_pk_mul_bf16 v5, s105, s105            ; encoding: [0x05,0x40,0x2a,0xcc,0x69,0xd2,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, vcc_lo, ttmp15
+// GFX1250: v_pk_mul_bf16 v5, vcc_lo, ttmp15        ; encoding: [0x05,0x40,0x2a,0xcc,0x6a,0xf6,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, vcc_hi, 0xfe0b
+// GFX1250: v_pk_mul_bf16 v5, vcc_hi, 0xfe0b        ; encoding: [0x05,0x40,0x2a,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, ttmp15, src_scc
+// GFX1250: v_pk_mul_bf16 v5, ttmp15, src_scc       ; encoding: [0x05,0x40,0x2a,0xcc,0x7b,0xfa,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, m0, 0.5
+// GFX1250: v_pk_mul_bf16 v5, m0, 0.5               ; encoding: [0x05,0x40,0x2a,0xcc,0x7d,0xe0,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, exec_lo, -1
+// GFX1250: v_pk_mul_bf16 v5, exec_lo, -1           ; encoding: [0x05,0x40,0x2a,0xcc,0x7e,0x82,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, exec_hi, null
+// GFX1250: v_pk_mul_bf16 v5, exec_hi, null         ; encoding: [0x05,0x40,0x2a,0xcc,0x7f,0xf8,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, null, exec_lo
+// GFX1250: v_pk_mul_bf16 v5, null, exec_lo         ; encoding: [0x05,0x40,0x2a,0xcc,0x7c,0xfc,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX1250: v_pk_mul_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2a,0xcc,0xc1,0xfe,0x00,0x20]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX1250: v_pk_mul_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2a,0xcc,0xf0,0xfa,0x00,0x58]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX1250: v_pk_mul_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2a,0xcc,0xfd,0xd4,0x00,0x10]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_mul_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX1250: v_pk_mul_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2a,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, v1, v2
+// GFX1250: v_pk_max_num_bf16 v5, v1, v2            ; encoding: [0x05,0x40,0x2c,0xcc,0x01,0x05,0x02,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, v255, v255
+// GFX1250: v_pk_max_num_bf16 v5, v255, v255        ; encoding: [0x05,0x40,0x2c,0xcc,0xff,0xff,0x03,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, s1, s2
+// GFX1250: v_pk_max_num_bf16 v5, s1, s2            ; encoding: [0x05,0x40,0x2c,0xcc,0x01,0x04,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, s105, s105
+// GFX1250: v_pk_max_num_bf16 v5, s105, s105        ; encoding: [0x05,0x40,0x2c,0xcc,0x69,0xd2,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, vcc_lo, ttmp15
+// GFX1250: v_pk_max_num_bf16 v5, vcc_lo, ttmp15    ; encoding: [0x05,0x40,0x2c,0xcc,0x6a,0xf6,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, vcc_hi, 0xfe0b
+// GFX1250: v_pk_max_num_bf16 v5, vcc_hi, 0xfe0b    ; encoding: [0x05,0x40,0x2c,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, ttmp15, src_scc
+// GFX1250: v_pk_max_num_bf16 v5, ttmp15, src_scc   ; encoding: [0x05,0x40,0x2c,0xcc,0x7b,0xfa,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, m0, 0.5
+// GFX1250: v_pk_max_num_bf16 v5, m0, 0.5           ; encoding: [0x05,0x40,0x2c,0xcc,0x7d,0xe0,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, exec_lo, -1
+// GFX1250: v_pk_max_num_bf16 v5, exec_lo, -1       ; encoding: [0x05,0x40,0x2c,0xcc,0x7e,0x82,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, exec_hi, null
+// GFX1250: v_pk_max_num_bf16 v5, exec_hi, null     ; encoding: [0x05,0x40,0x2c,0xcc,0x7f,0xf8,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, null, exec_lo
+// GFX1250: v_pk_max_num_bf16 v5, null, exec_lo     ; encoding: [0x05,0x40,0x2c,0xcc,0x7c,0xfc,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX1250: v_pk_max_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2c,0xcc,0xc1,0xfe,0x00,0x20]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX1250: v_pk_max_num_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2c,0xcc,0xf0,0xfa,0x00,0x58]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX1250: v_pk_max_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2c,0xcc,0xfd,0xd4,0x00,0x10]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_max_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX1250: v_pk_max_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2c,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, v1, v2
+// GFX1250: v_pk_min_num_bf16 v5, v1, v2            ; encoding: [0x05,0x40,0x2b,0xcc,0x01,0x05,0x02,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, v255, v255
+// GFX1250: v_pk_min_num_bf16 v5, v255, v255        ; encoding: [0x05,0x40,0x2b,0xcc,0xff,0xff,0x03,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, s1, s2
+// GFX1250: v_pk_min_num_bf16 v5, s1, s2            ; encoding: [0x05,0x40,0x2b,0xcc,0x01,0x04,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, s105, s105
+// GFX1250: v_pk_min_num_bf16 v5, s105, s105        ; encoding: [0x05,0x40,0x2b,0xcc,0x69,0xd2,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, vcc_lo, ttmp15
+// GFX1250: v_pk_min_num_bf16 v5, vcc_lo, ttmp15    ; encoding: [0x05,0x40,0x2b,0xcc,0x6a,0xf6,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, vcc_hi, 0xfe0b
+// GFX1250: v_pk_min_num_bf16 v5, vcc_hi, 0xfe0b    ; encoding: [0x05,0x40,0x2b,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, ttmp15, src_scc
+// GFX1250: v_pk_min_num_bf16 v5, ttmp15, src_scc   ; encoding: [0x05,0x40,0x2b,0xcc,0x7b,0xfa,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, m0, 0.5
+// GFX1250: v_pk_min_num_bf16 v5, m0, 0.5           ; encoding: [0x05,0x40,0x2b,0xcc,0x7d,0xe0,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, exec_lo, -1
+// GFX1250: v_pk_min_num_bf16 v5, exec_lo, -1       ; encoding: [0x05,0x40,0x2b,0xcc,0x7e,0x82,0x01,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, exec_hi, null
+// GFX1250: v_pk_min_num_bf16 v5, exec_hi, null     ; encoding: [0x05,0x40,0x2b,0xcc,0x7f,0xf8,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, null, exec_lo
+// GFX1250: v_pk_min_num_bf16 v5, null, exec_lo     ; encoding: [0x05,0x40,0x2b,0xcc,0x7c,0xfc,0x00,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0]
+// GFX1250: v_pk_min_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2b,0xcc,0xc1,0xfe,0x00,0x20]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1]
+// GFX1250: v_pk_min_num_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2b,0xcc,0xf0,0xfa,0x00,0x58]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0]
+// GFX1250: v_pk_min_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2b,0xcc,0xfd,0xd4,0x00,0x10]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_min_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp
+// GFX1250: v_pk_min_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2b,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, v1, v2, s3
+// GFX1250: v_pk_fma_bf16 v5, v1, v2, s3            ; encoding: [0x05,0x40,0x11,0xcc,0x01,0x05,0x0e,0x18]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, v255, s2, s105
+// GFX1250: v_pk_fma_bf16 v5, v255, s2, s105        ; encoding: [0x05,0x40,0x11,0xcc,0xff,0x05,0xa4,0x19]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, s1, v255, exec_hi
+// GFX1250: v_pk_fma_bf16 v5, s1, v255, exec_hi     ; encoding: [0x05,0x40,0x11,0xcc,0x01,0xfe,0xff,0x19]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, s105, s105, exec_lo
+// GFX1250: v_pk_fma_bf16 v5, s105, s105, exec_lo   ; encoding: [0x05,0x40,0x11,0xcc,0x69,0xd2,0xf8,0x19]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, vcc_lo, ttmp15, v3
+// GFX1250: v_pk_fma_bf16 v5, vcc_lo, ttmp15, v3    ; encoding: [0x05,0x40,0x11,0xcc,0x6a,0xf6,0x0c,0x1c]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, vcc_hi, 0xfe0b, v255
+// GFX1250: v_pk_fma_bf16 v5, vcc_hi, 0xfe0b, v255  ; encoding: [0x05,0x40,0x11,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, ttmp15, src_scc, ttmp15
+// GFX1250: v_pk_fma_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x11,0xcc,0x7b,0xfa,0xed,0x19]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
+// GFX1250: v_pk_fma_bf16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x11,0xcc,0x7d,0xe0,0xf5,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
+// GFX1250: v_pk_fma_bf16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1] ; encoding: [0x05,0x40,0x11,0xcc,0x7e,0x82,0xad,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0]
+// GFX1250: v_pk_fma_bf16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0] ; encoding: [0x05,0x00,0x11,0xcc,0x7f,0xf8,0xa8,0x11]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0] neg_lo:[1,0,0] neg_hi:[1,0,0]
+// GFX1250: v_pk_fma_bf16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x05,0x39,0x11,0xcc,0x7c,0xfc,0xfc,0x2b,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,1,0] neg_hi:[0,1,0]
+// GFX1250: v_pk_fma_bf16 v5, -1, exec_hi, src_scc neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x05,0x42,0x11,0xcc,0xc1,0xfe,0xf4,0x5b]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+// GFX1250: v_pk_fma_bf16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x05,0x4c,0x11,0xcc,0xf0,0xfa,0xc0,0x93]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX1250: v_pk_fma_bf16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x11,0xcc,0xfd,0xd4,0x04,0x0b]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_pk_fma_bf16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[1,1,1] neg_hi:[1,1,1] clamp
+// GFX1250: v_pk_fma_bf16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[1,1,1] neg_hi:[1,1,1] clamp ; encoding: [0xff,0xa7,0x11,0xcc,0xff,0xd6,0xf0,0xf9,0x0b,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3p.txt
index 5f424902efa59..921db7e22a598 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3p.txt
@@ -500,3 +500,228 @@
 
 # GFX1250: v_pk_max3_u16 v10, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x0a,0x00,0x30,0xcc,0x01,0x05,0x0e,0x0c]
 0x0a,0x00,0x30,0xcc,0x01,0x05,0x0e,0x0c
+
+# GFX1250: v_pk_add_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x23,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+0xff,0xd3,0x23,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_add_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x23,0xcc,0xc1,0xfe,0x00,0x20]
+0x05,0x59,0x23,0xcc,0xc1,0xfe,0x00,0x20
+
+# GFX1250: v_pk_add_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x23,0xcc,0xf0,0xfa,0x00,0x58]
+0x05,0x42,0x23,0xcc,0xf0,0xfa,0x00,0x58
+
+# GFX1250: v_pk_add_bf16 v5, exec_hi, null         ; encoding: [0x05,0x40,0x23,0xcc,0x7f,0xf8,0x00,0x18]
+0x05,0x40,0x23,0xcc,0x7f,0xf8,0x00,0x18
+
+# GFX1250: v_pk_add_bf16 v5, exec_lo, -1           ; encoding: [0x05,0x40,0x23,0xcc,0x7e,0x82,0x01,0x18]
+0x05,0x40,0x23,0xcc,0x7e,0x82,0x01,0x18
+
+# GFX1250: v_pk_add_bf16 v5, m0, 0.5               ; encoding: [0x05,0x40,0x23,0xcc,0x7d,0xe0,0x01,0x18]
+0x05,0x40,0x23,0xcc,0x7d,0xe0,0x01,0x18
+
+# GFX1250: v_pk_add_bf16 v5, null, exec_lo         ; encoding: [0x05,0x40,0x23,0xcc,0x7c,0xfc,0x00,0x18]
+0x05,0x40,0x23,0xcc,0x7c,0xfc,0x00,0x18
+
+# GFX1250: v_pk_add_bf16 v5, s1, s2                ; encoding: [0x05,0x40,0x23,0xcc,0x01,0x04,0x00,0x18]
+0x05,0x40,0x23,0xcc,0x01,0x04,0x00,0x18
+
+# GFX1250: v_pk_add_bf16 v5, s105, s105            ; encoding: [0x05,0x40,0x23,0xcc,0x69,0xd2,0x00,0x18]
+0x05,0x40,0x23,0xcc,0x69,0xd2,0x00,0x18
+
+# GFX1250: v_pk_add_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x23,0xcc,0xfd,0xd4,0x00,0x10]
+0x05,0x48,0x23,0xcc,0xfd,0xd4,0x00,0x10
+
+# GFX1250: v_pk_add_bf16 v5, ttmp15, src_scc       ; encoding: [0x05,0x40,0x23,0xcc,0x7b,0xfa,0x01,0x18]
+0x05,0x40,0x23,0xcc,0x7b,0xfa,0x01,0x18
+
+# GFX1250: v_pk_add_bf16 v5, v1, v2                ; encoding: [0x05,0x40,0x23,0xcc,0x01,0x05,0x02,0x18]
+0x05,0x40,0x23,0xcc,0x01,0x05,0x02,0x18
+
+# GFX1250: v_pk_add_bf16 v5, v255, v255            ; encoding: [0x05,0x40,0x23,0xcc,0xff,0xff,0x03,0x18]
+0x05,0x40,0x23,0xcc,0xff,0xff,0x03,0x18
+
+# GFX1250: v_pk_add_bf16 v5, vcc_hi, 0xfe0b        ; encoding: [0x05,0x40,0x23,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+0x05,0x40,0x23,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_add_bf16 v5, vcc_lo, ttmp15        ; encoding: [0x05,0x40,0x23,0xcc,0x6a,0xf6,0x00,0x18]
+0x05,0x40,0x23,0xcc,0x6a,0xf6,0x00,0x18
+
+# GFX1250: v_pk_mul_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2a,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+0xff,0xd3,0x2a,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_mul_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2a,0xcc,0xc1,0xfe,0x00,0x20]
+0x05,0x59,0x2a,0xcc,0xc1,0xfe,0x00,0x20
+
+# GFX1250: v_pk_mul_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2a,0xcc,0xf0,0xfa,0x00,0x58]
+0x05,0x42,0x2a,0xcc,0xf0,0xfa,0x00,0x58
+
+# GFX1250: v_pk_mul_bf16 v5, exec_hi, null         ; encoding: [0x05,0x40,0x2a,0xcc,0x7f,0xf8,0x00,0x18]
+0x05,0x40,0x2a,0xcc,0x7f,0xf8,0x00,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, exec_lo, -1           ; encoding: [0x05,0x40,0x2a,0xcc,0x7e,0x82,0x01,0x18]
+0x05,0x40,0x2a,0xcc,0x7e,0x82,0x01,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, m0, 0.5               ; encoding: [0x05,0x40,0x2a,0xcc,0x7d,0xe0,0x01,0x18]
+0x05,0x40,0x2a,0xcc,0x7d,0xe0,0x01,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, null, exec_lo         ; encoding: [0x05,0x40,0x2a,0xcc,0x7c,0xfc,0x00,0x18]
+0x05,0x40,0x2a,0xcc,0x7c,0xfc,0x00,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, s1, s2                ; encoding: [0x05,0x40,0x2a,0xcc,0x01,0x04,0x00,0x18]
+0x05,0x40,0x2a,0xcc,0x01,0x04,0x00,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, s105, s105            ; encoding: [0x05,0x40,0x2a,0xcc,0x69,0xd2,0x00,0x18]
+0x05,0x40,0x2a,0xcc,0x69,0xd2,0x00,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2a,0xcc,0xfd,0xd4,0x00,0x10]
+0x05,0x48,0x2a,0xcc,0xfd,0xd4,0x00,0x10
+
+# GFX1250: v_pk_mul_bf16 v5, ttmp15, src_scc       ; encoding: [0x05,0x40,0x2a,0xcc,0x7b,0xfa,0x01,0x18]
+0x05,0x40,0x2a,0xcc,0x7b,0xfa,0x01,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, v1, v2                ; encoding: [0x05,0x40,0x2a,0xcc,0x01,0x05,0x02,0x18]
+0x05,0x40,0x2a,0xcc,0x01,0x05,0x02,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, v255, v255            ; encoding: [0x05,0x40,0x2a,0xcc,0xff,0xff,0x03,0x18]
+0x05,0x40,0x2a,0xcc,0xff,0xff,0x03,0x18
+
+# GFX1250: v_pk_mul_bf16 v5, vcc_hi, 0xfe0b        ; encoding: [0x05,0x40,0x2a,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+0x05,0x40,0x2a,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_mul_bf16 v5, vcc_lo, ttmp15        ; encoding: [0x05,0x40,0x2a,0xcc,0x6a,0xf6,0x00,0x18]
+0x05,0x40,0x2a,0xcc,0x6a,0xf6,0x00,0x18
+
+# GFX1250: v_pk_max_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2c,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+0xff,0xd3,0x2c,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_max_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2c,0xcc,0xc1,0xfe,0x00,0x20]
+0x05,0x59,0x2c,0xcc,0xc1,0xfe,0x00,0x20
+
+# GFX1250: v_pk_max_num_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2c,0xcc,0xf0,0xfa,0x00,0x58]
+0x05,0x42,0x2c,0xcc,0xf0,0xfa,0x00,0x58
+
+# GFX1250: v_pk_max_num_bf16 v5, exec_hi, null     ; encoding: [0x05,0x40,0x2c,0xcc,0x7f,0xf8,0x00,0x18]
+0x05,0x40,0x2c,0xcc,0x7f,0xf8,0x00,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, exec_lo, -1       ; encoding: [0x05,0x40,0x2c,0xcc,0x7e,0x82,0x01,0x18]
+0x05,0x40,0x2c,0xcc,0x7e,0x82,0x01,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, m0, 0.5           ; encoding: [0x05,0x40,0x2c,0xcc,0x7d,0xe0,0x01,0x18]
+0x05,0x40,0x2c,0xcc,0x7d,0xe0,0x01,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, null, exec_lo     ; encoding: [0x05,0x40,0x2c,0xcc,0x7c,0xfc,0x00,0x18]
+0x05,0x40,0x2c,0xcc,0x7c,0xfc,0x00,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, s1, s2            ; encoding: [0x05,0x40,0x2c,0xcc,0x01,0x04,0x00,0x18]
+0x05,0x40,0x2c,0xcc,0x01,0x04,0x00,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, s105, s105        ; encoding: [0x05,0x40,0x2c,0xcc,0x69,0xd2,0x00,0x18]
+0x05,0x40,0x2c,0xcc,0x69,0xd2,0x00,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2c,0xcc,0xfd,0xd4,0x00,0x10]
+0x05,0x48,0x2c,0xcc,0xfd,0xd4,0x00,0x10
+
+# GFX1250: v_pk_max_num_bf16 v5, ttmp15, src_scc   ; encoding: [0x05,0x40,0x2c,0xcc,0x7b,0xfa,0x01,0x18]
+0x05,0x40,0x2c,0xcc,0x7b,0xfa,0x01,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, v1, v2            ; encoding: [0x05,0x40,0x2c,0xcc,0x01,0x05,0x02,0x18]
+0x05,0x40,0x2c,0xcc,0x01,0x05,0x02,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, v255, v255        ; encoding: [0x05,0x40,0x2c,0xcc,0xff,0xff,0x03,0x18]
+0x05,0x40,0x2c,0xcc,0xff,0xff,0x03,0x18
+
+# GFX1250: v_pk_max_num_bf16 v5, vcc_hi, 0xfe0b    ; encoding: [0x05,0x40,0x2c,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+0x05,0x40,0x2c,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_max_num_bf16 v5, vcc_lo, ttmp15    ; encoding: [0x05,0x40,0x2c,0xcc,0x6a,0xf6,0x00,0x18]
+0x05,0x40,0x2c,0xcc,0x6a,0xf6,0x00,0x18
+
+# GFX1250: v_pk_min_num_bf16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x2b,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00]
+0xff,0xd3,0x2b,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_min_num_bf16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x2b,0xcc,0xc1,0xfe,0x00,0x20]
+0x05,0x59,0x2b,0xcc,0xc1,0xfe,0x00,0x20
+
+# GFX1250: v_pk_min_num_bf16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x2b,0xcc,0xf0,0xfa,0x00,0x58]
+0x05,0x42,0x2b,0xcc,0xf0,0xfa,0x00,0x58
+
+# GFX1250: v_pk_min_num_bf16 v5, exec_hi, null     ; encoding: [0x05,0x40,0x2b,0xcc,0x7f,0xf8,0x00,0x18]
+0x05,0x40,0x2b,0xcc,0x7f,0xf8,0x00,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, exec_lo, -1       ; encoding: [0x05,0x40,0x2b,0xcc,0x7e,0x82,0x01,0x18]
+0x05,0x40,0x2b,0xcc,0x7e,0x82,0x01,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, m0, 0.5           ; encoding: [0x05,0x40,0x2b,0xcc,0x7d,0xe0,0x01,0x18]
+0x05,0x40,0x2b,0xcc,0x7d,0xe0,0x01,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, null, exec_lo     ; encoding: [0x05,0x40,0x2b,0xcc,0x7c,0xfc,0x00,0x18]
+0x05,0x40,0x2b,0xcc,0x7c,0xfc,0x00,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, s1, s2            ; encoding: [0x05,0x40,0x2b,0xcc,0x01,0x04,0x00,0x18]
+0x05,0x40,0x2b,0xcc,0x01,0x04,0x00,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, s105, s105        ; encoding: [0x05,0x40,0x2b,0xcc,0x69,0xd2,0x00,0x18]
+0x05,0x40,0x2b,0xcc,0x69,0xd2,0x00,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x2b,0xcc,0xfd,0xd4,0x00,0x10]
+0x05,0x48,0x2b,0xcc,0xfd,0xd4,0x00,0x10
+
+# GFX1250: v_pk_min_num_bf16 v5, ttmp15, src_scc   ; encoding: [0x05,0x40,0x2b,0xcc,0x7b,0xfa,0x01,0x18]
+0x05,0x40,0x2b,0xcc,0x7b,0xfa,0x01,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, v1, v2            ; encoding: [0x05,0x40,0x2b,0xcc,0x01,0x05,0x02,0x18]
+0x05,0x40,0x2b,0xcc,0x01,0x05,0x02,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, v255, v255        ; encoding: [0x05,0x40,0x2b,0xcc,0xff,0xff,0x03,0x18]
+0x05,0x40,0x2b,0xcc,0xff,0xff,0x03,0x18
+
+# GFX1250: v_pk_min_num_bf16 v5, vcc_hi, 0xfe0b    ; encoding: [0x05,0x40,0x2b,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00]
+0x05,0x40,0x2b,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_min_num_bf16 v5, vcc_lo, ttmp15    ; encoding: [0x05,0x40,0x2b,0xcc,0x6a,0xf6,0x00,0x18]
+0x05,0x40,0x2b,0xcc,0x6a,0xf6,0x00,0x18
+
+# GFX1250: v_pk_fma_bf16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[1,1,1] neg_hi:[1,1,1] clamp ; encoding: [0xff,0xa7,0x11,0xcc,0xff,0xd6,0xf0,0xf9,0x0b,0xfe,0x00,0x00]
+0xff,0xa7,0x11,0xcc,0xff,0xd6,0xf0,0xf9,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_fma_bf16 v5, -1, exec_hi, src_scc neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x05,0x42,0x11,0xcc,0xc1,0xfe,0xf4,0x5b]
+0x05,0x42,0x11,0xcc,0xc1,0xfe,0xf4,0x5b
+
+# GFX1250: v_pk_fma_bf16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x05,0x4c,0x11,0xcc,0xf0,0xfa,0xc0,0x93]
+0x05,0x4c,0x11,0xcc,0xf0,0xfa,0xc0,0x93
+
+# GFX1250: v_pk_fma_bf16 v5, exec_hi, null, vcc_lo op_sel_hi:[0,1,0] ; encoding: [0x05,0x00,0x11,0xcc,0x7f,0xf8,0xa8,0x11]
+0x05,0x00,0x11,0xcc,0x7f,0xf8,0xa8,0x11
+
+# GFX1250: v_pk_fma_bf16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1] ; encoding: [0x05,0x40,0x11,0xcc,0x7e,0x82,0xad,0x01]
+0x05,0x40,0x11,0xcc,0x7e,0x82,0xad,0x01
+
+# GFX1250: v_pk_fma_bf16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x11,0xcc,0x7d,0xe0,0xf5,0x01]
+0x05,0x00,0x11,0xcc,0x7d,0xe0,0xf5,0x01
+
+# GFX1250: v_pk_fma_bf16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1] op_sel_hi:[1,0,0] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x05,0x39,0x11,0xcc,0x7c,0xfc,0xfc,0x2b,0x0b,0xfe,0x00,0x00]
+0x05,0x39,0x11,0xcc,0x7c,0xfc,0xfc,0x2b,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_fma_bf16 v5, s1, v255, exec_hi     ; encoding: [0x05,0x40,0x11,0xcc,0x01,0xfe,0xff,0x19]
+0x05,0x40,0x11,0xcc,0x01,0xfe,0xff,0x19
+
+# GFX1250: v_pk_fma_bf16 v5, s105, s105, exec_lo   ; encoding: [0x05,0x40,0x11,0xcc,0x69,0xd2,0xf8,0x19]
+0x05,0x40,0x11,0xcc,0x69,0xd2,0xf8,0x19
+
+# GFX1250: v_pk_fma_bf16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x11,0xcc,0xfd,0xd4,0x04,0x0b]
+0x05,0x50,0x11,0xcc,0xfd,0xd4,0x04,0x0b
+
+# GFX1250: v_pk_fma_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x11,0xcc,0x7b,0xfa,0xed,0x19]
+0x05,0x40,0x11,0xcc,0x7b,0xfa,0xed,0x19
+
+# GFX1250: v_pk_fma_bf16 v5, v1, v2, s3            ; encoding: [0x05,0x40,0x11,0xcc,0x01,0x05,0x0e,0x18]
+0x05,0x40,0x11,0xcc,0x01,0x05,0x0e,0x18
+
+# GFX1250: v_pk_fma_bf16 v5, v255, s2, s105        ; encoding: [0x05,0x40,0x11,0xcc,0xff,0x05,0xa4,0x19]
+0x05,0x40,0x11,0xcc,0xff,0x05,0xa4,0x19
+
+# GFX1250: v_pk_fma_bf16 v5, vcc_hi, 0xfe0b, v255  ; encoding: [0x05,0x40,0x11,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00]
+0x05,0x40,0x11,0xcc,0x6b,0xfe,0xfd,0x1f,0x0b,0xfe,0x00,0x00
+
+# GFX1250: v_pk_fma_bf16 v5, vcc_lo, ttmp15, v3    ; encoding: [0x05,0x40,0x11,0xcc,0x6a,0xf6,0x0c,0x1c]
+0x05,0x40,0x11,0xcc,0x6a,0xf6,0x0c,0x1c


        


More information about the llvm-commits mailing list