[llvm] [AARCH64] Add assembly/disassembly for FMMLA instructions (PR #113313)

Tue Oct 22 06:33:23 PDT 2024

https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/113313

This patch adds assembly/disassembly for the following instructions:
FMMLA (widening, FP16 to FP32)
FMMLA (widening, FP8 to FP16)
FMMLA (widening, FP8 to FP32)

According to [1]

[1]https://developer.arm.com/documentation/ddi0602

>From a9b15e3bd974f16c0d0ef9f40f8f227e3990e454 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 22 Oct 2024 13:10:20 +0000
Subject: [PATCH] [AARCH64] Add assembly/disassembly for FMMLA instructions

---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 +++++-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 62 +++++++++----------
 .../SVE/matrix-multiply-fp-diagnostics.s      |  5 --
 .../MC/AArch64/SVE2/directive-arch-negative.s | 18 ++++++
 llvm/test/MC/AArch64/SVE2/directive-arch.s    | 12 ++++
 .../SVE2/directive-arch_extension-negative.s  | 20 ++++++
 .../AArch64/SVE2/directive-arch_extension.s   | 14 +++++
 .../MC/AArch64/SVE2/directive-cpu-negative.s  | 18 ++++++
 llvm/test/MC/AArch64/SVE2/directive-cpu.s     | 12 ++++
 .../AArch64/SVE2/fmmla-f16f32mm-diagnostics.s | 18 ++++++
 llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s    | 41 ++++++++++++
 .../AArch64/SVE2/fmmla-f8f16mm-diagnostics.s  | 24 +++++++
 llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s     | 39 ++++++++++++
 .../AArch64/SVE2/fmmla-f8f32mm-diagnostics.s  | 30 +++++++++
 llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s     | 39 ++++++++++++
 15 files changed, 330 insertions(+), 40 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2a857234c7d745..a576c18357bc60 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2427,7 +2427,7 @@ let Predicates = [HasBF16, HasSVEorSME] in {
 } // End HasBF16, HasSVEorSME
 
 let Predicates = [HasBF16, HasSVE] in {
-  defm BFMMLA_ZZZ   : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
+  defm BFMMLA_ZZZ   : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
 } // End HasBF16, HasSVE
 
 let Predicates = [HasBF16, HasSVEorSME] in {
@@ -3449,11 +3449,15 @@ let Predicates = [HasSVEorSME, HasMatMulInt8] in {
 } // End HasSVEorSME, HasMatMulInt8
 
 let Predicates = [HasSVE, HasMatMulFP32] in {
-  defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
+  defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
 } // End HasSVE, HasMatMulFP32
 
+let Predicates = [HasSVE_F16F32MM] in {
+  def FMLLA_ZZZ_SH : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
+} // End HasSVE_F16F32MM
+
 let Predicates = [HasSVE, HasMatMulFP64] in {
-  defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
+  defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
   defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8,  nxv16i8, nxv16i1, AArch64ld1ro_z>;
   defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1,  AArch64ld1ro_z>;
   defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1,  AArch64ld1ro_z>;
@@ -4245,6 +4249,14 @@ def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
 def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
 } // End HasSSVE_FP8FMA
 
+let Predicates = [HasSVE2, HasF8F32MM] in {
+  def FMMLA_ZZZ_BtoS :  sve2_fp8_mmla<0b0, ZPR32, "fmmla">;
+}
+
+let Predicates = [HasSVE2, HasF8F16MM] in {
+  def FMMLA_ZZZ_BtoH :  sve2_fp8_mmla<0b1, ZPR16, "fmmla">;
+}
+
 let Predicates = [HasSSVE_FP8DOT2] in {
 // FP8 Widening Dot-Product - Indexed Group
 defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed_h<"fdot">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f655526fa81cfe..df3b6c96f31823 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -8767,30 +8767,6 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
   def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
 }
 
-class sve_bfloat_matmul<string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
-  asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
-  bits<5> Zm;
-  bits<5> Zda;
-  bits<5> Zn;
-  let Inst{31-21} = 0b01100100011;
-  let Inst{20-16} = Zm;
-  let Inst{15-10} = 0b111001;
-  let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zda;
-
-  let Constraints = "$Zda = $_Zda";
-  let DestructiveInstType = DestructiveOther;
-  let ElementSize = ElementSizeH;
-  let hasSideEffects = 0;
-  let mayRaiseFPException = 1;
-}
-
-multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
-  def NAME : sve_bfloat_matmul<asm>;
-  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
-}
-
 class sve_bfloat_convert<bit N, string asm>
 : I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
   asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
@@ -8913,14 +8889,14 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
 // SVE Floating Point Matrix Multiply Accumulate Group
 //===----------------------------------------------------------------------===//
 
-class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm),
+class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
+: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
     asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
   bits<5> Zm;
-  let Inst{31-23} = 0b011001001;
-  let Inst{22}    = sz;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = opc;
   let Inst{21}    = 1;
   let Inst{20-16} = Zm;
   let Inst{15-10} = 0b111001;
@@ -8929,15 +8905,15 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
-  let ElementSize = zprty.ElementSize;
+  let ElementSize = reg_ty.ElementSize;
   let hasSideEffects = 0;
   let mayRaiseFPException = 1;
 }
 
-multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
-  def NAME : sve_fp_matrix_mla<sz, asm, zprty>;
+multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
+  def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
 
-  def : SVE_3_Op_Pat<vt, op , vt, vt, vt, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -10357,6 +10333,28 @@ class sve2_fp8_mla_long_long_by_indexed_elem<bits<2> TT, string mnemonic>
   let Uses = [FPMR, FPCR];
 }
 
+// FP8 Matrix Multiply-accumulate Group
+class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>
+    : I<(outs dst_ty:$Zda),
+      (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
+      mnemonic, "\t$Zda, $Zn, $Zm",
+      "", []>, Sched<[]>{
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-23} = 0b011001000;
+  let Inst{22}    = opc;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b111000;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = DestructiveOther;
+  let ElementSize         = dst_ty.ElementSize;
+  let Uses = [FPMR, FPCR];
+}
+
 class sve_fp8_dot_indexed<bits<4> opc, ZPRRegOp dst_ty, Operand iop_ty, string mnemonic>
 : I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, iop_ty:$iop),
     mnemonic, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
index 8ae4d499284423..2fe43f7aa8444c 100644
--- a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
@@ -3,11 +3,6 @@
 // --------------------------------------------------------------------------//
 // FMMLA (SVE)
 
-// Invalid element size
-
-fmmla z0.h, z1.h, z2.h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
-
 // Mis-matched element size
 
 fmmla z0.d, z1.s, z2.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
index 767e5dc5a1513b..fda59ebff721ab 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
@@ -29,3 +29,21 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
+
+.arch armv9-a+sve2+f8f16mm
+.arch armv9-a+sve2+nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.arch armv9-a+sve2+f8f32mm
+.arch armv9-a+sve2+nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.arch armv9-a+sve-f16f32mm
+.arch armv9-a+nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch.s b/llvm/test/MC/AArch64/SVE2/directive-arch.s
index 0a921ccf5e4f0f..d973471b71ae12 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch.s
@@ -19,3 +19,15 @@ rax1 z0.d, z0.d, z0.d
 .arch armv9-a+sve2-bitperm
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
+
+.arch armv9-a+sve2+f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.arch armv9-a+sve2+f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.arch armv9-a+sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
index 6d90f7f0574900..7d378b4700d180 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
@@ -29,3 +29,23 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
+
+.arch_extension sve2 
+.arch_extension f8f16mm
+.arch_extension nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.arch_extension sve2 
+.arch_extension f8f32mm
+.arch_extension nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.arch_extension sve-f16f32mm
+.arch_extension nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
index 90f5bec07d5426..e609d307b19035 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
@@ -19,3 +19,17 @@ rax1 z0.d, z0.d, z0.d
 .arch_extension sve2-bitperm
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
+
+.arch_extension sve2
+.arch_extension f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.arch_extension sve2
+.arch_extension f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.arch_extension sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
index ed99aa7f007865..c11d0fea87e81b 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
@@ -29,3 +29,21 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
+
+.cpu generic+sve2+f8f16mm
+.cpu generic+sve2+nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.cpu generic+sve2+f8f32mm
+.cpu generic+sve2+nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.cpu generic+sve-f16f32mm
+.cpu generic+nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu.s b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
index b3cacc46c1ddce..b67c145971dfa8 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-cpu.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
@@ -19,3 +19,15 @@ rax1 z0.d, z0.d, z0.d
 .cpu generic+sve2-bitperm
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
+
+.cpu generic+sve2+f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.cpu generic+sve2+f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.cpu generic+sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s
new file mode 100644
index 00000000000000..924c123f0ca5bc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s
@@ -0,0 +1,18 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-f16f32mm  2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// FMMLA (SVE)
+
+// Invalid element size
+
+fmmla z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f32mm
+fmmla z0.d, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+
+// Mis-matched element size
+
+fmmla z0.s, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+fmmla z0.s, z1.d, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s
new file mode 100644
index 00000000000000..c366bb70e2c26f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s
@@ -0,0 +1,41 @@
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve,+sve-f16f32mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve,+sve-f16f32mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+movprfx z23, z31
+fmmla   z23.s, z13.h, z8.h  // 01100100-00101000-11100101-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.s, z13.h, z8.h
+// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6428e5b7 <unknown>
+
+fmmla   z0.s, z0.h, z0.h  // 01100100-00100000-11100100-00000000
+// CHECK-INST: fmmla   z0.s, z0.h, z0.h
+// CHECK-ENCODING: [0x00,0xe4,0x20,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6420e400 <unknown>
+
+fmmla   z23.s, z13.h, z8.h  // 01100100-00101000-11100101-10110111
+// CHECK-INST: fmmla   z23.s, z13.h, z8.h
+// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6428e5b7 <unknown>
+
+fmmla   z31.s, z31.h, z31.h  // 01100100-00111111-11100111-11111111
+// CHECK-INST: fmmla   z31.s, z31.h, z31.h
+// CHECK-ENCODING: [0xff,0xe7,0x3f,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 643fe7ff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s
new file mode 100644
index 00000000000000..59818d2d24a481
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm   2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmmla   z21.b, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.b, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.d, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.d, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.h, z21.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.h, z21.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.s, z21.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.s, z21.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s
new file mode 100644
index 00000000000000..ff343548993cfe
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2,+f8f16mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f16mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+movprfx z23, z31
+fmmla   z23.h, z13.b, z8.b  // 01100100-01101000-11100001-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.h, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0xe1,0x68,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6468e1b7 <unknown>
+
+fmmla   z0.h, z0.b, z0.b  // 01100100-01100000-11100000-00000000
+// CHECK-INST: fmmla   z0.h, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe0,0x60,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6460e000 <unknown>
+
+fmmla   z21.h, z10.b, z21.b  // 01100100-01110101-11100001-01010101
+// CHECK-INST: fmmla   z21.h, z10.b, z21.b
+// CHECK-ENCODING: [0x55,0xe1,0x75,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6475e155 <unknown>
+
+fmmla   z31.h, z31.b, z31.b  // 01100100-01111111-11100011-11111111
+// CHECK-INST: fmmla   z31.h, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0xe3,0x7f,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 647fe3ff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s
new file mode 100644
index 00000000000000..0b1eb1b24e2643
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s
@@ -0,0 +1,30 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm   2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmmla   z21.b, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.b, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.h, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z21.h, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.d, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.d, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.h, z21.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.h, z21.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.s, z21.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.s, z21.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s
new file mode 100644
index 00000000000000..8b59a112dc61b4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f32mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+movprfx z23, z31
+fmmla   z23.s, z13.b, z8.b  // 01100100-00101000-11100001-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.s, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0xe1,0x28,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6428e1b7 <unknown>
+
+fmmla   z0.s, z0.b, z0.b  // 01100100-00100000-11100000-00000000
+// CHECK-INST: fmmla   z0.s, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe0,0x20,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6420e000 <unknown>
+
+fmmla   z21.s, z10.b, z21.b  // 01100100-00110101-11100001-01010101
+// CHECK-INST: fmmla   z21.s, z10.b, z21.b
+// CHECK-ENCODING: [0x55,0xe1,0x35,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6435e155 <unknown>
+
+fmmla   z31.s, z31.b, z31.b  // 01100100-00111111-11100011-11111111
+// CHECK-INST: fmmla   z31.s, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0xe3,0x3f,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 643fe3ff <unknown>