[llvm] [llvm][AArch64][Assembly]: Add SME_F8F16 and SME_F8F32 Ass/Disass. (PR #70640)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 10:12:00 PDT 2023


https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/70640

>From 54f3852b09e655264669bd1f9dc7c24fcc4af4b1 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Wed, 27 Sep 2023 14:50:29 +0000
Subject: [PATCH] [llvm][AArch64][Assembly]: Add SME_F8F16 and SME_F8F32
 Ass/Disass.

This patch adds the feature flags of SME_F8F16 and SME_F8F32,
 and the assembly/disassembly for the following instructions of SME2:
  * SME:
    - FMLAL
    - FMLALL
    - FVDOT
    - FVDOTB
    - FVDOTT
    - FMOPA

Change-Id: Ib5d5c675651633eac8bb382d674c0d7735e1ee5c
---
 .../llvm/TargetParser/AArch64TargetParser.h   |   4 +
 llvm/lib/Target/AArch64/AArch64.td            |   6 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   4 +
 .../lib/Target/AArch64/AArch64RegisterInfo.td |   4 +
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  54 +++
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |  13 +-
 .../MCTargetDesc/AArch64InstPrinter.cpp       |   4 +
 llvm/lib/Target/AArch64/SMEInstrFormats.td    | 126 +++++-
 .../MC/AArch64/FP8_SME2/dot-diagnostics.s     | 215 +++++++++++
 llvm/test/MC/AArch64/FP8_SME2/dot.s           | 361 ++++++++++++++++++
 .../MC/AArch64/FP8_SME2/mla-diagnostics.s     | 195 ++++++++++
 llvm/test/MC/AArch64/FP8_SME2/mla.s           | 361 ++++++++++++++++++
 .../MC/AArch64/FP8_SME2/mopa-diagnostics.s    |  46 +++
 llvm/test/MC/AArch64/FP8_SME2/mopa.s          |  39 ++
 llvm/test/MC/AArch64/SME2/fmlal-diagnostics.s |   4 +-
 llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s |   4 +-
 .../TargetParser/TargetParserTest.cpp         |   7 +-
 17 files changed, 1433 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/dot-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/dot.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/mla-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/mla.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/mopa-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/mopa.s

diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 80e3fe76e5c2527..59db32ad7ee4381 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -168,6 +168,8 @@ enum ArchExtKind : unsigned {
   AEK_SSVE_FP8DOT2 =  64, // FEAT_SSVE_FP8DOT2
   AEK_FP8DOT4 =       65, // FEAT_FP8DOT4
   AEK_SSVE_FP8DOT4 =  66, // FEAT_SSVE_FP8DOT4
+  AEK_SMEF8F16 =      67, // FEAT_SME_F8F16
+  AEK_SMEF8F32 =      68, // FEAT_SME_F8F32
   AEK_NUM_EXTENSIONS
 };
 using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -285,6 +287,8 @@ inline constexpr ExtensionInfo Extensions[] = {
     {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0},
     {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "", 0},
     {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
+    {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme2,+fp8", 0},
+    {"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0},
     // Special cases
     {"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
 };
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index aa0efb3e6ec13d5..00fe88bcdab0a3f 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -535,6 +535,12 @@ def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
 def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
   "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
 
+def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
+  "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
+
+def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
+  "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;
+
 def FeatureAppleA7SysReg  : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
   "Apple A7 (the CPU formerly known as Cyclone)">;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0125d3dbecf96cb..af2daed729c0b04 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -187,6 +187,10 @@ def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
                                  AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
                                                            (all_of FeatureSVE2, FeatureFP8DOT4)),
                                  "ssve-fp8dot4 or (sve2 and fp8dot4)">;
+def HasSMEF8F16     : Predicate<"Subtarget->hasSMEF8F16()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
+def HasSMEF8F32     : Predicate<"Subtarget->hasSMEF8F32()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
 
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 497c143584d48d4..b70ab8568884784 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1268,6 +1268,10 @@ class ZPRVectorListMul<int ElementWidth, int NumRegs> : ZPRVectorList<ElementWid
 
 let EncoderMethod = "EncodeRegAsMultipleOf<2>",
     DecoderMethod = "DecodeZPR2Mul2RegisterClass" in {
+  def ZZ_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,0>"> {
+    let ParserMatchClass = ZPRVectorListMul<0, 2>;
+  }
+
   def ZZ_b_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'b'>"> {
     let ParserMatchClass = ZPRVectorListMul<8, 2>;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index f55b84b02f85162..1c77d15ba17b232 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -885,3 +885,57 @@ defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
 defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
 defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
 } //[HasSME2, HasFAMINMAX]
+
+let Predicates = [HasSME2, HasSMEF8F16] in {
+defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
+
+defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_16b<"fdot",    0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
+defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_16b<"fdot",    0b100, ZZZZ_b_mul_r, ZPR4b8>;
+defm FDOT_VG2_M2ZZ_BtoH  :  sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
+defm FDOT_VG4_M4ZZ_BtoH  :  sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2Z2Z_BtoH : sme2_dot_mla_add_sub_array_vg2_multi<"fdot",    0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FDOT_VG4_M4Z4Z_BtoH : sme2_dot_mla_add_sub_array_vg4_multi<"fdot",    0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+def  FMLAL_MZZI_BtoH      : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>;
+defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>;
+defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>;
+def  FMLAL_VG2_MZZ_BtoH   : sme2_mla_long_array_single_16b<"fmlal">;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FMLAL_VG2_M2ZZ_BtoH  : sme2_fp_mla_long_array_vg2_single<"fmlal",  0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>;
+defm FMLAL_VG4_M4ZZ_BtoH  :  sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>;
+defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal",   0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal",   0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_fp16<"fmopa", 0b1, 0b0, 0b01, ZPR8>;
+
+} //[HasSME2p1, HasSMEF8F16]
+
+let Predicates = [HasSME2, HasSMEF8F32] in {
+// TODO : Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
+defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
+defm FDOT_VG2_M2ZZ_BtoS  : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
+defm FDOT_VG4_M4ZZ_BtoS  : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;
+// TODO : Replace nxv16i8 by nxv16f8
+defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot",   0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot",   0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
+def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;
+
+defm FMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"fmlall",     0b01, 0b000, null_frag>;
+defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
+defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>;
+// TODO: Replace nxv16i8 by nxv16f8
+defm FMLALL_MZZ_BtoS       : sme2_mla_ll_array_single<"fmlall",      0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>;
+defm FMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>;
+defm FMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>;
+defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall",   0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
+defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall",   0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+
+
+defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>;
+
+} //[HasSME2p1, HasSMEF8F32]
+
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 9c16d22677b9e09..65e1b79ede3530d 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3658,6 +3658,8 @@ static const struct Extension {
     {"ssve-fp8dot2", {AArch64::FeatureSSVE_FP8DOT2}},
     {"fp8dot4", {AArch64::FeatureFP8DOT4}},
     {"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
+    {"sme-f8f16", {AArch64::FeatureSMEF8F16}},
+    {"sme-f8f32", {AArch64::FeatureSMEF8F32}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -4553,7 +4555,7 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
 
   Operands.push_back(AArch64Operand::CreateReg(
       RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext()));
-  Lex(); // Eat identifier token.
+  Lex(); // eat register
 
   // Check if register is followed by an index
   if (parseOptionalToken(AsmToken::LBrac)) {
@@ -4565,16 +4567,17 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
     if (!MCE)
       return TokError("immediate value expected for vector index");
-    if (parseToken(AsmToken::RBrac, "']' expected"))
-      return ParseStatus::Failure;
-
     Operands.push_back(AArch64Operand::CreateImm(
         MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
         getLoc(), getContext()));
+    if (parseOptionalToken(AsmToken::Comma))
+      if (parseOptionalMulOperand(Operands))
+        return MatchOperand_ParseFail;
+    if (parseToken(AsmToken::RBrac, "']' expected"))
+      return ParseStatus::Failure;
     Operands.push_back(
         AArch64Operand::CreateToken("]", getLoc(), getContext()));
   }
-
   return ParseStatus::Success;
 }
 
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 988c78699179f0c..c5de5b4de4aef3a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1740,6 +1740,10 @@ template <unsigned NumLanes, char LaneKind>
 void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
                                               const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
+  if (LaneKind == 0) {
+    printVectorList(MI, OpNum, STI, O, "");
+    return;
+  }
   std::string Suffix(".");
   if (NumLanes)
     Suffix += itostr(NumLanes) + LaneKind;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index d8b44c68fbdee10..e2c9dbc1a156128 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1922,6 +1922,17 @@ multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op,
   def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
 }
 
+class sme2_mla_long_array_single_16b<string mnemonic>
+    : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8,  mnemonic> {
+    bits<4> Zm;
+    bits<5> Zn;
+    bits<3> imm;
+    let Inst{20}    = 0b1;
+    let Inst{19-16} = Zm;
+    let Inst{9-5}   = Zn;
+    let Inst{2-0}   = imm;
+}
+
 class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
                                       MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
                                       ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
@@ -1937,7 +1948,6 @@ class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
   let Inst{1-0}   = imm;
 }
 
-	
 multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
                                              RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
                                              ValueType zpr_ty, SDPatternOperator intrinsic> {
@@ -1971,7 +1981,8 @@ multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, Matrix
                                              RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
                                              ValueType zpr_ty, SDPatternOperator intrinsic> {
   def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, 
-                                             vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1>;
+                                             vector_ty, mnemonic, "vgx4">,
+                                             SMEPseudo2Instr<NAME, 1>;
 
   def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
                                                       SMEMatrixArray>;
@@ -2390,7 +2401,6 @@ multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
 
 //===----------------------------------------------------------------------===//
 // SME2 Dot Products and MLA
-
 class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
                                      RegisterOperand multi_vector_ty,
                                      ZPRRegOp vector_ty, Operand index_ty,
@@ -2428,7 +2438,6 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
     bits<2> i;
     let Inst{11-10} = i;
   }
-
   def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
 
   def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
@@ -2439,6 +2448,7 @@ multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<
 }
 
 // SME2.1 multi-vec ternary indexed two registers 16-bit
+// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
 multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
                                                 RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
   def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
@@ -2448,11 +2458,24 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bit
     let Inst{11-10} = i{2-1};
     let Inst{3}     = i{0};
   }
+
   def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
         (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
         multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
 }
 
+// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
+// two registers
+class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
+   : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
+                                    ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
+
+  bits<2> i;
+  let Inst{10} = i{1};
+  let Inst{3}  = i{0};
+  let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
+}
+
 // SME2 multi-vec ternary indexed two registers 64-bit
 
 class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
@@ -2608,7 +2631,83 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
         (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
         multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
 }
+
+// FMLAL (multiple and indexed vector, FP8 to FP16)
+class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
+                                          RegisterOperand multi_vector_ty, string mnemonic>
+    : I<(outs MatrixOp16:$ZAda),
+        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
+         multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
+         mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
+         "", []>, Sched<[]> {
+  bits<4> Zm;
+  bits<2> Rv;
+  bits<4> i;
+  bits<2> imm2;
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = sz;
+  let Inst{21-20} = 0b01;
+  let Inst{19-16} = Zm;
+  let Inst{15}    = vg4;
+  let Inst{14-13} = Rv;
+  let Inst{12}    = op{2};
+  let Inst{11-10} = i{3-2};
+  let Inst{5-4}   = op{1-0};
+  let Inst{3-2}   = i{1-0};
+  let Inst{1-0}   = imm2;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
+  def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
+    bits<4> Zn;
+    let Inst{9-6} = Zn;
+ }
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
+                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
+                  uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+}
+
+multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
+  def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
+    bits<3> Zn;
+    let Inst{9-7} = Zn;
+    let Inst{6}   = 0b0;
+  }
+ def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
+                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
+                  uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+}
+
 //===----------------------------------------------------------------------===//
+// SME2 multi-vec indexed long long MLA one source 16-bit
+class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
+    : I<(outs MatrixOp16:$ZAda),
+        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
+        mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
+        "", []>, Sched<[]> {
+  bits<4> Zm;
+  bits<2> Rv;
+  bits<4> i;
+  bits<5> Zn;
+  bits<3> imm3;
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = sz;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = Zm;
+  let Inst{15}    = i{3};
+  let Inst{14-13} = Rv;
+  let Inst{12}    = op{1};
+  let Inst{11-10} = i{2-1};
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = op{0};
+  let Inst{3}     = i{0};
+  let Inst{2-0}   = imm3;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
 // SME2 multi-vec indexed long long MLA one source 32-bit
 class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
     : I<(outs MatrixOp32:$ZAda),
@@ -3059,6 +3158,25 @@ class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
   let Inst{4-0}   = Rt;
 }
 
+// SME2 move vector to lookup table
+class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
+   : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
+        mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
+        "", []>, Sched<[]> {
+  bits<5> Zt;
+  bits<2> off2;
+  let Inst{31-14} = 0b110000000100111100;
+  let Inst{13-12} = off2;
+  let Inst{11-5}  = opc;
+  let Inst{4-0}   = Zt;
+}
+
+multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
+  def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
+  def : InstAlias<mnemonic # "\t$ZTt, $Zt",
+                 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
+}
+
 //===----------------------------------------------------------------------===//
 // SME2 lookup table expand one register
 class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
diff --git a/llvm/test/MC/AArch64/FP8_SME2/dot-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/dot-diagnostics.s
new file mode 100644
index 000000000000000..88a7393b34e90f9
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/dot-diagnostics.s
@@ -0,0 +1,215 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+sme-f8f16,+sme-f8f32  2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fdot    za.h[w8, 0, vgx2], {z0.h-z1.h}, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.h[w8, 0, vgx2], {z0.h-z1.h}, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7], {z31.b-z2.b}, z15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: fdot    za.h[w11, 7], {z31.b-z2.b}, z15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7, vgx2], {z28.b-z31.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.h[w11, 7, vgx2], {z28.b-z31.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w11, 7], {z29.b-z30.b}, {z30.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.s[w11, 7], {z29.b-z30.b}, {z30.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7], {z30.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.h[w11, 7], {z30.b-z0.b}, z15.
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector select offset
+
+fvdott  za.s[w11, -1, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdott  za.s[w11, -1, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdott  za.s[w8, -1, vgx4], {z0.b-z1.b}, z0.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdott  za.s[w8, -1, vgx4], {z0.b-z1.b}, z0.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdot   za.h[w11, -1], {z30.b-z31.b}, z15.b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdot   za.h[w11, -1], {z30.b-z31.b}, z15.b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w11, -1], {z28.b-z31.b}, z15.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fdot    za.s[w11, -1], {z28.b-z31.b}, z15.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdott  za.s[w11, 8, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdott  za.s[w11, 8, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdott  za.s[w8, 8, vgx4], {z0.b-z1.b}, z0.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdott  za.s[w8, 8, vgx4], {z0.b-z1.b}, z0.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdot   za.h[w11, 8], {z30.b-z31.b}, z15.b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fvdot   za.h[w11, 8], {z30.b-z31.b}, z15.b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w11, 8], {z28.b-z31.b}, z15.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fdot    za.s[w11, 8], {z28.b-z31.b}, z15.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector list
+
+fdot    za.s[w11, 7, vgx4], {z29.b-z1.b}, {z29.b-z1.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fdot    za.s[w11, 7, vgx4], {z29.b-z1.b}, {z29.b-z1.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7], {z30.b-z2.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fdot    za.h[w11, 7], {z30.b-z2.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w8, 0], {z31.b-z3.b}, {z31.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fdot    za.s[w8, 0], {z31.b-z3.b}, {z31.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w11, 7, vgx2], {z30.b-z31.b}, {z0.b-z4.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fdot    za.s[w11, 7, vgx2], {z30.b-z31.b}, {z0.b-z4.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Register Suffix
+fdot    za.d[w11, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za.d[w11, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.b[w11, 7], {z31.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za.b[w11, 7], {z31.b-z0.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.b[w11, 7, vgx2], {z30.h-z31.h}, {z30.h-z31.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za.b[w11, 7, vgx2], {z30.h-z31.h}, {z30.h-z31.h}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za[w11, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za[w11, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.d[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fdot    za.d[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fdot    za.h[w7, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fdot    za.h[w7, 7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w, 0, vgx2], {z0.b-z1.b}, z0.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fdot    za.h[w, 0, vgx2], {z0.b-z1.b}, z0.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w12, 0], {z0.b-z3.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fdot    za.s[w12, 0], {z0.b-z3.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid indexed-vector or single-vector register
+
+fdot za.h[w8, 0], {z0.b-z1.b}, z16.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT: fdot za.h[w8, 0], {z0.b-z1.b}, z16.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot za.s[w8, 0], {z0.b-z1.b}, z16.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT:  fdot za.s[w8, 0], {z0.b-z1.b}, z16.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+fdot    za.h[w11, 7], {z28.b-z31.b}, {z0.b-z2.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.h[w11, 7], {z28.b-z31.b}, {z0.b-z2.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7, vgx4], {z31.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot    za.h[w11, 7, vgx4], {z31.b-z0.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid lane index
+
+fdot    za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot    za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.h[w11, 7], {z30.b-z31.b}, z15.b[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fdot    za.h[w11, 7], {z30.b-z31.b}, z15.b[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w8, 0], {z0.b-z1.b}, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot    za.s[w8, 0], {z0.b-z1.b}, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot    za.s[w11, 7], {z30.b-z31.b}, z15.b[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot    za.s[w11, 7], {z30.b-z31.b}, z15.b[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdot   za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fvdot   za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdot   za.h[w11, 7], {z30.b-z31.b}, z15.b[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fvdot   za.h[w11, 7], {z30.b-z31.b}, z15.b[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdotb  za.s[w8, 0, vgx4], {z0.b-z1.b}, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fvdotb  za.s[w8, 0, vgx4], {z0.b-z1.b}, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fvdott  za.s[w11, 7, vgx4], {z30.b-z31.b}, z15.b[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fvdott  za.s[w11, 7, vgx4], {z30.b-z31.b}, z15.b[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SME2/dot.s b/llvm/test/MC/AArch64/FP8_SME2/dot.s
new file mode 100644
index 000000000000000..ebee5f9ff5f66f0
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/dot.s
@@ -0,0 +1,361 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-f8f16,+sme-f8f32 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-f8f16,+sme-f8f32 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+// FDOT
+// x2
+
+fdot    za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b  // 11000001-00100000-00010000-00001000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x08,0x10,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1201008 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z1.b}, z0.b  // 11000001-00100000-00010000-00001000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x08,0x10,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1201008 <unknown>
+
+fdot    za.h[w11, 7], {z13.b-z14.b}, z8.b  // 11000001-00101000-01110001-10101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z13.b, z14.b }, z8.b
+// CHECK-ENCODING: [0xaf,0x71,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c12871af <unknown>
+
+fdot    za.h[w11, 7, vgx2], {z31.b-z0.b}, z15.b  // 11000001-00101111-01110011-11101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xef,0x73,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c12f73ef <unknown>
+
+fdot    za.s[w8, 0, vgx2], {z0.b-z1.b}, z0.b  // 11000001-00100000-00010000-00011000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x18,0x10,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1201018 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z1.b}, z0.b  // 11000001-00100000-00010000-00011000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x18,0x10,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1201018 <unknown>
+
+fdot    za.s[w11, 7, vgx2], {z31.b-z0.b}, z15.b  // 11000001-00101111-01110011-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xff,0x73,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c12f73ff <unknown>
+
+fdot    za.s[w11, 7], {z31.b-z0.b}, z15.b  // 11000001-00101111-01110011-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xff,0x73,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c12f73ff <unknown>
+
+fdot    za.h[w8, 0, vgx2], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00010000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x10,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a01020 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00010000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x10,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a01020 <unknown>
+
+fdot    za.h[w11, 7, vgx2], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01110011-11100111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe7,0x73,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1be73e7 <unknown>
+
+fdot    za.h[w11, 7], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01110011-11100111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe7,0x73,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1be73e7 <unknown>
+
+fdot    za.s[w8, 0, vgx2], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00010000-00110000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x30,0x10,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a01030 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00010000-00110000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x30,0x10,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a01030 <unknown>
+
+fdot    za.s[w11, 7, vgx2], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01110011-11110111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xf7,0x73,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1be73f7 <unknown>
+
+fdot    za.s[w11, 7], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01110011-11110111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xf7,0x73,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1be73f7 <unknown>
+
+fdot    za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00000000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x00,0xd0,0xc1]
+// CHECK-ERROR:  instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1d00020 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00000000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x00,0xd0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1d00020 <unknown>
+
+fdot    za.h[w11, 7, vgx2], {z30.b-z31.b}, z15.b[7]  // 11000001-11011111-01101111-11101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z30.b, z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xef,0x6f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1df6fef <unknown>
+
+fdot    za.h[w11, 7], {z30.b-z31.b}, z15.b[7]  // 11000001-11011111-01101111-11101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx2], { z30.b, z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xef,0x6f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1df6fef <unknown>
+
+fdot    za.s[w8, 0, vgx2], {z0.b-z1.b}, z0.b[0]  // 11000001-01010000-00000000-00111000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x38,0x00,0x50,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1500038 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z1.b}, z0.b[0]  // 11000001-01010000-00000000-00111000
+// CHECK-INST: fdot    za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x38,0x00,0x50,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1500038 <unknown>
+
+fdot    za.s[w11, 7, vgx2], {z30.b-z31.b}, z15.b[3]  // 11000001-01011111-01101111-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3]
+// CHECK-ENCODING: [0xff,0x6f,0x5f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c15f6fff <unknown>
+
+fdot    za.s[w11, 7], {z30.b-z31.b}, z15.b[3]  // 11000001-01011111-01101111-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3]
+// CHECK-ENCODING: [0xff,0x6f,0x5f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c15f6fff <unknown>
+
+// x4
+
+
+fdot    za.h[w8, 0, vgx4], {z0.b-z3.b}, z0.b  // 11000001-00110000-00010000-00001000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x08,0x10,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1301008 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z3.b}, z0.b  // 11000001-00110000-00010000-00001000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x08,0x10,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1301008 <unknown>
+
+fdot    za.h[w11, 7, vgx4], {z31.b-z2.b}, z15.b  // 11000001-00111111-01110011-11101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xef,0x73,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c13f73ef <unknown>
+
+fdot    za.h[w11, 7], {z31.b-z2.b}, z15.b  // 11000001-00111111-01110011-11101111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xef,0x73,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c13f73ef <unknown>
+
+fdot    za.s[w8, 0, vgx4], {z0.b-z3.b}, z0.b  // 11000001-00110000-00010000-00011000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x18,0x10,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1301018 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z3.b}, z0.b  // 11000001-00110000-00010000-00011000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x18,0x10,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1301018 <unknown>
+
+fdot    za.s[w11, 7, vgx4], {z31.b-z2.b}, z15.b  // 11000001-00111111-01110011-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xff,0x73,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c13f73ff <unknown>
+
+fdot    za.s[w11, 7], {z31.b-z2.b}, z15.b  // 11000001-00111111-01110011-11111111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xff,0x73,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c13f73ff <unknown>
+
+fdot    za.h[w8, 0, vgx4], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00010000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x10,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a11020 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00010000-00100000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x10,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a11020 <unknown>
+
+fdot    za.h[w11, 7, vgx4], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01110011-10100111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa7,0x73,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1bd73a7 <unknown>
+
+fdot    za.h[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01110011-10100111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa7,0x73,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1bd73a7 <unknown>
+fdot    za.h[w8, 0, vgx4], {z0.b-z3.b}, z0.b[0]  // 11000001-00010000-10010000-01000000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x40,0x90,0x10,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1109040 <unknown>
+
+fdot    za.s[w8, 0, vgx4], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00010000-00110000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x30,0x10,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a11030 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00010000-00110000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x30,0x10,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a11030 <unknown>
+
+fdot    za.s[w11, 7, vgx4], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01110011-10110111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xb7,0x73,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1bd73b7 <unknown>
+
+fdot    za.s[w11, 7], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01110011-10110111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xb7,0x73,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1bd73b7 <unknown>
+
+fdot    za.h[w8, 0], {z0.b-z3.b}, z0.b[0]  // 11000001-00010000-10010000-01000000
+// CHECK-INST: fdot    za.h[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x40,0x90,0x10,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1109040 <unknown>
+
+fdot    za.h[w11, 7, vgx4], {z28.b-z31.b}, z15.b[7]  // 11000001-00011111-11111111-11001111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], { z28.b - z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xcf,0xff,0x1f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c11fffcf <unknown>
+
+fdot    za.h[w11, 7], {z28.b-z31.b}, z15.b[7]  // 11000001-00011111-11111111-11001111
+// CHECK-INST: fdot    za.h[w11, 7, vgx4], { z28.b - z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xcf,0xff,0x1f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c11fffcf <unknown>
+
+fdot    za.s[w8, 0, vgx4], {z0.b-z3.b}, z0.b[0]  // 11000001-01010000-10000000-00001000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x08,0x80,0x50,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1508008 <unknown>
+
+fdot    za.s[w8, 0], {z0.b-z3.b}, z0.b[0]  // 11000001-01010000-10000000-00001000
+// CHECK-INST: fdot    za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x08,0x80,0x50,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1508008 <unknown>
+
+fdot    za.s[w11, 7, vgx4], {z28.b-z31.b}, z15.b[3]  // 11000001-01011111-11101111-10001111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3]
+// CHECK-ENCODING: [0x8f,0xef,0x5f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c15fef8f <unknown>
+
+fdot    za.s[w11, 7], {z28.b-z31.b}, z15.b[3]  // 11000001-01011111-11101111-10001111
+// CHECK-INST: fdot    za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3]
+// CHECK-ENCODING: [0x8f,0xef,0x5f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c15fef8f <unknown>
+
+
+// FVDOT
+
+fvdot   za.h[w8, 0, vgx2], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00010000-00100000
+// CHECK-INST: fvdot   za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x10,0xd0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1d01020 <unknown>
+
+fvdot   za.h[w8, 0], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00010000-00100000
+// CHECK-INST: fvdot   za.h[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x10,0xd0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1d01020 <unknown>
+
+fvdot   za.h[w11, 7, vgx2], {z30.b-z31.b}, z15.b[7]  // 11000001-11011111-01111111-11101111
+// CHECK-INST: fvdot   za.h[w11, 7, vgx2], { z30.b, z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xef,0x7f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1df7fef <unknown>
+
+fvdot   za.h[w11, 7], {z30.b-z31.b}, z15.b[7]  // 11000001-11011111-01111111-11101111
+// CHECK-INST: fvdot   za.h[w11, 7, vgx2], { z30.b, z31.b }, z15.b[7]
+// CHECK-ENCODING: [0xef,0x7f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1df7fef <unknown>
+
+// FVDOTB
+
+fvdotb  za.s[w8, 0, vgx4], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00001000-00000000
+// CHECK-INST: fvdotb  za.s[w8, 0, vgx4], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x00,0x08,0xd0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1d00800 <unknown>
+
+fvdotb  za.s[w11, 7, vgx4], {z30.b-z31.b}, z15.b[3]  // 11000001-11011111-01101111-11001111
+// CHECK-INST: fvdotb  za.s[w11, 7, vgx4], { z30.b, z31.b }, z15.b[3]
+// CHECK-ENCODING: [0xcf,0x6f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1df6fcf <unknown>
+
+// FVDOTT
+fvdott  za.s[w8, 0, vgx4], {z0.b-z1.b}, z0.b[0]  // 11000001-11010000-00001000-00010000
+// CHECK-INST: fvdott  za.s[w8, 0, vgx4], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x10,0x08,0xd0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1d00810 <unknown>
+
+fvdott  za.s[w11, 7, vgx4], {z30.b-z31.b}, z15.b[3]  // 11000001-11011111-01101111-11011111
+// CHECK-INST: fvdott  za.s[w11, 7, vgx4], { z30.b, z31.b }, z15.b[3]
+// CHECK-ENCODING: [0xdf,0x6f,0xdf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1df6fdf <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SME2/mla-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/mla-diagnostics.s
new file mode 100644
index 000000000000000..b16875aa664e272
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/mla-diagnostics.s
@@ -0,0 +1,195 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+sme-f8f16,+sme-f8f32  2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fmlal    za.h[w8, 0:1, vgx2], {z0.h-z1.h}, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlal    za.h[w8, 0:1, vgx2], {z0.h-z1.h}, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 4:7], {z31.b-z2.b}, z15
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: fmlal    za.h[w11, 4:7], {z31.b-z2.b}, z15
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 6:7, vgx2], {z28.b-z31.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlal    za.h[w11, 6:7, vgx2], {z28.b-z31.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w11, 0:3], {z29.b-z30.b}, {z30.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlall    za.s[w11, 0:3], {z29.b-z30.b}, {z30.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w11, 4:7], {z30.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlall    za.s[w11, 4:7], {z30.b-z0.b}, z15.
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector select offset
+
+fmlal   za.h[w11, 1:2], {z30.b-z31.b}, z15.b[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 2 in the range [0, 6] or [0, 14] depending on the instruction, and the second immediate is immf + 1.
+// CHECK-NEXT: fmlal   za.h[w11, 1:2], {z30.b-z31.b}, z15.b[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 3:4], {z28.b-z31.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 2 in the range [0, 6] or [0, 14] depending on the instruction, and the second immediate is immf + 1.
+// CHECK-NEXT: fmlal    za.h[w11, 3:4], {z28.b-z31.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 7:8, vgx4], {z28.b-z31.b}, {z4.b-z7.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 2 in the range [0, 6] or [0, 14] depending on the instruction, and the second immediate is immf + 1.
+// CHECK-NEXT: fmlal    za.h[w11, 7:8, vgx4], {z28.b-z31.b}, {z4.b-z7.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w11, 3:6, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3.
+// CHECK-NEXT: fmlall  za.s[w11, 3:6, vgx4], {z30.b-z31.b}, z15.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w8, 3:6, vgx4], {z0.b-z3.b}, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3.
+// CHECK-NEXT: fmlall  za.s[w8, 3:6, vgx4], {z0.b-z3.b}, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w11, 7:10, vgx4], {z30.b-z31.b}, {z12.b-z13.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form <immf>:<imml>, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3.
+// CHECK-NEXT: fmlall  za.s[w11, 7:10, vgx4], {z30.b-z31.b}, {z12.b-z13.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector list
+
+fmlal    za.h[w11, 4:7, vgx4], {z29.b-z1.b}, {z29.b-z1.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fmlal    za.h[w11, 4:7, vgx4], {z29.b-z1.b}, {z29.b-z1.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 4:7], {z30.b-z2.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fmlal    za.h[w11, 4:7], {z30.b-z2.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w8, 0:1], {z31.b-z3.b}, {z31.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fmlall    za.s[w8, 0:1], {z31.b-z3.b}, {z31.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w11, 6:7, vgx2], {z30.b-z31.b}, {z0.b-z4.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: fmlall    za.s[w11, 6:7, vgx2], {z30.b-z31.b}, {z0.b-z4.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Register Suffix
+fmlal    za.d[w11, 4:5, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlal    za.d[w11, 4:5, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za[w11, 2:3], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlal    za[w11, 2:3], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.b[w11, 6:7], {z31.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlal    za.b[w11, 6:7], {z31.b-z0.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.b[w11, 6:7, vgx2], {z30.h-z31.h}, {z30.h-z31.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlall    za.b[w11, 6:7, vgx2], {z30.h-z31.h}, {z30.h-z31.h}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za[w11, 4:7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlall    za[w11, 4:7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.d[w11, 12:15], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
+// CHECK-NEXT: fmlall    za.d[w11, 12:15], {z28.b-z31.b}, {z28.b-z31.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fmlal    za.h[w7, 4:7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmlal    za.h[w7, 4:7, vgx4], {z31.b-z2.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w, 0:1, vgx2], {z0.b-z1.b}, z0.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmlal    za.h[w, 0:1, vgx2], {z0.b-z1.b}, z0.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w12, 0:3], {z0.b-z3.b}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmlall    za.s[w12, 0:3], {z0.b-z3.b}, {z0.b-z3.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid indexed-vector or single-vector register
+
+fmlal za.h[w8, 0:1], {z0.b-z1.b}, z16.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT: fmlal za.h[w8, 0:1], {z0.b-z1.b}, z16.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal   za.h[w9, 14:15], z31.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT: fmlal   za.h[w9, 14:15], z31.b, z16.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w11, 8:11], z9.b, z16.b[13]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT: fmlall  za.s[w11, 8:11], z9.b, z16.b[13]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w11, 12:15], z31.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b
+// CHECK-NEXT: fmlall  za.s[w11, 12:15], z31.b, z16.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+fmlal    za.h[w11, 10:11], {z28.b-z31.b}, {z0.b-z2.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlal    za.h[w11, 10:11], {z28.b-z31.b}, {z0.b-z2.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall    za.s[w11, 4:7, vgx4], {z31.b-z0.b}, z15.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlall    za.s[w11, 4:7, vgx4], {z31.b-z0.b}, z15.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid lane index
+
+fmlal   za.h[w11, 14:15], z31.b, z15.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15].
+// CHECK-NEXT: fmlal   za.h[w11, 14:15], z31.b, z15.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlal    za.h[w11, 2:3], {z30.b-z31.b}, z15.b[16]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15].
+// CHECK-NEXT: fmlal    za.h[w11, 2:3], {z30.b-z31.b}, z15.b[16]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w9, 12:15], z12.b, z11.b[16]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15].
+// CHECK-NEXT: fmlall  za.s[w9, 12:15], z12.b, z11.b[16]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlall  za.s[w8, 4:7], {z16.b-z19.b}, z0.b[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15].
+// CHECK-NEXT: fmlall  za.s[w8, 4:7], {z16.b-z19.b}, z0.b[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SME2/mla.s b/llvm/test/MC/AArch64/FP8_SME2/mla.s
new file mode 100644
index 000000000000000..833ea706da9666b
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/mla.s
@@ -0,0 +1,361 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-f8f16,+sme-f8f32 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,+sme-f8f32 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-f8f16,+sme-f8f32 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+fmlal   za.h[w8, 0:1], z0.b, z0.b  // 11000001-00110000-00001100-00000000
+// CHECK-INST: fmlal   za.h[w8, 0:1], z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x0c,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1300c00 <unknown>
+
+fmlal   za.h[w11, 14:15], z31.b, z15.b  // 11000001-00111111-01101111-11100111
+// CHECK-INST: fmlal   za.h[w11, 14:15], z31.b, z15.b
+// CHECK-ENCODING: [0xe7,0x6f,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c13f6fe7 <unknown>
+
+fmlal   za.h[w8, 0:1], z0.b, z0.b[0]  // 11000001-11000000-00000000-00000000
+// CHECK-INST: fmlal   za.h[w8, 0:1], z0.b, z0.b[0]
+// CHECK-ENCODING: [0x00,0x00,0xc0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1c00000 <unknown>
+
+fmlal   za.h[w11, 14:15], z31.b, z15.b[15]  // 11000001-11001111-11101111-11101111
+// CHECK-INST: fmlal   za.h[w11, 14:15], z31.b, z15.b[15]
+// CHECK-ENCODING: [0xef,0xef,0xcf,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1cfefef <unknown>
+
+// x2
+
+fmlal   za.h[w8, 0:1, vgx2], {z0.b-z1.b}, z0.b  // 11000001-00100000-00001000-00000100
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x04,0x08,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1200804 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z1.b}, z0.b  // 11000001-00100000-00001000-00000100
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x04,0x08,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1200804 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx2], {z31.b-z0.b}, z15.b  // 11000001-00101111-01101011-11100111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xe7,0x6b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c12f6be7 <unknown>
+
+fmlal   za.h[w11, 6:7], {z31.b-z0.b}, z15.b  // 11000001-00101111-01101011-11100111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xe7,0x6b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c12f6be7 <unknown>
+
+fmlal   za.h[w8, 0:1, vgx2], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00001000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x08,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a00820 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00001000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x08,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a00820 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx2], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01101011-11100011
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe3,0x6b,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1be6be3 <unknown>
+
+fmlal   za.h[w11, 6:7], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01101011-11100011
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe3,0x6b,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1be6be3 <unknown>
+
+fmlal   za.h[w8, 0:1, vgx2], {z0.b-z1.b}, z0.b[0]  // 11000001-10010000-00010000-00110000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x30,0x10,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1901030 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z1.b}, z0.b[0]  // 11000001-10010000-00010000-00110000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x30,0x10,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1901030 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx2], {z30.b-z31.b}, z15.b[15]  // 11000001-10011111-01111111-11111111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z30.b, z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xff,0x7f,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c19f7fff <unknown>
+
+fmlal   za.h[w11, 6:7], {z30.b-z31.b}, z15.b[15]  // 11000001-10011111-01111111-11111111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx2], { z30.b, z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xff,0x7f,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c19f7fff <unknown>
+
+// x4
+
+fmlal   za.h[w8, 0:1, vgx4], {z0.b-z3.b}, z0.b  // 11000001-00110000-00001000-00000100
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x04,0x08,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1300804 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z3.b}, z0.b  // 11000001-00110000-00001000-00000100
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x04,0x08,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1300804 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx4], {z31.b-z2.b}, z15.b  // 11000001-00111111-01101011-11100111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xe7,0x6b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c13f6be7 <unknown>
+
+fmlal   za.h[w11, 6:7], {z31.b-z2.b}, z15.b  // 11000001-00111111-01101011-11100111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xe7,0x6b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c13f6be7 <unknown>
+
+fmlal   za.h[w8, 0:1, vgx4], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00001000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x08,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a10820 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00001000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x08,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1a10820 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx4], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01101011-10100011
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa3,0x6b,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1bd6ba3 <unknown>
+
+fmlal   za.h[w11, 6:7], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01101011-10100011
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa3,0x6b,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1bd6ba3 <unknown>
+
+fmlal   za.h[w8, 0:1, vgx4], {z0.b-z3.b}, z0.b[0]  // 11000001-10010000-10010000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x90,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1909020 <unknown>
+
+fmlal   za.h[w8, 0:1], {z0.b-z3.b}, z0.b[0]  // 11000001-10010000-10010000-00100000
+// CHECK-INST: fmlal   za.h[w8, 0:1, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x90,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c1909020 <unknown>
+
+fmlal   za.h[w11, 6:7, vgx4], {z28.b-z31.b}, z15.b[15]  // 11000001-10011111-11111111-10101111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], { z28.b - z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xaf,0xff,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c19fffaf <unknown>
+
+fmlal   za.h[w11, 6:7], {z28.b-z31.b}, z15.b[15]  // 11000001-10011111-11111111-10101111
+// CHECK-INST: fmlal   za.h[w11, 6:7, vgx4], { z28.b - z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xaf,0xff,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: c19fffaf <unknown>
+
+
+//FMLALL
+
+fmlall  za.s[w8, 0:3], z0.b, z0.b  // 11000001-00110000-00000100-00000000
+// CHECK-INST: fmlall  za.s[w8, 0:3], z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x04,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1300400 <unknown>
+
+fmlall  za.s[w11, 12:15], z31.b, z15.b  // 11000001-00111111-01100111-11100011
+// CHECK-INST: fmlall  za.s[w11, 12:15], z31.b, z15.b
+// CHECK-ENCODING: [0xe3,0x67,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c13f67e3 <unknown>
+
+fmlall  za.s[w8, 0:3], z0.b, z0.b[0]  // 11000001-01000000-00000000-00000000
+// CHECK-INST: fmlall  za.s[w8, 0:3], z0.b, z0.b[0]
+// CHECK-ENCODING: [0x00,0x00,0x40,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1400000 <unknown>
+
+fmlall  za.s[w11, 12:15], z31.b, z15.b[15]  // 11000001-01001111-11111111-11100011
+// CHECK-INST: fmlall  za.s[w11, 12:15], z31.b, z15.b[15]
+// CHECK-ENCODING: [0xe3,0xff,0x4f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c14fffe3 <unknown>
+
+// x2
+
+fmlall  za.s[w8, 0:3, vgx2], {z0.b-z1.b}, z0.b  // 11000001-00100000-00000000-00000010
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x02,0x00,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1200002 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z1.b}, z0.b  // 11000001-00100000-00000000-00000010
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b
+// CHECK-ENCODING: [0x02,0x00,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1200002 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx2], {z31.b-z0.b}, z15.b  // 11000001-00101111-01100011-11100011
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xe3,0x63,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c12f63e3 <unknown>
+
+fmlall  za.s[w11, 4:7], {z31.b-z0.b}, z15.b  // 11000001-00101111-01100011-11100011
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b
+// CHECK-ENCODING: [0xe3,0x63,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c12f63e3 <unknown>
+
+fmlall  za.s[w8, 0:3, vgx2], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x00,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a00020 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z1.b}, {z0.b-z1.b}  // 11000001-10100000-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b }
+// CHECK-ENCODING: [0x20,0x00,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a00020 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx2], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01100011-11100001
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe1,0x63,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1be63e1 <unknown>
+
+fmlall  za.s[w11, 4:7], {z30.b-z31.b}, {z30.b-z31.b}  // 11000001-10111110-01100011-11100001
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xe1,0x63,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1be63e1 <unknown>
+
+fmlall  za.s[w8, 0:3, vgx2], {z0.b-z1.b}, z0.b[0]  // 11000001-10010000-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x00,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1900020 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z1.b}, z0.b[0]  // 11000001-10010000-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0]
+// CHECK-ENCODING: [0x20,0x00,0x90,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1900020 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx2], {z30.b-z31.b}, z15.b[15]  // 11000001-10011111-01101111-11100111
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xe7,0x6f,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c19f6fe7 <unknown>
+
+fmlall  za.s[w11, 4:7], {z30.b-z31.b}, z15.b[15]  // 11000001-10011111-01101111-11100111
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xe7,0x6f,0x9f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c19f6fe7 <unknown>
+
+// x4
+
+fmlall  za.s[w8, 0:3, vgx4], {z0.b-z3.b}, z0.b  // 11000001-00110000-00000000-00000010
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x02,0x00,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1300002 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z3.b}, z0.b  // 11000001-00110000-00000000-00000010
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b
+// CHECK-ENCODING: [0x02,0x00,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1300002 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx4], {z31.b-z2.b}, z15.b  // 11000001-00111111-01100011-11100011
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xe3,0x63,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c13f63e3 <unknown>
+
+fmlall  za.s[w11, 4:7], {z31.b-z2.b}, z15.b  // 11000001-00111111-01100011-11100011
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], {  z31.b, z0.b, z1.b, z2.b  }, z15.b
+// CHECK-ENCODING: [0xe3,0x63,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c13f63e3 <unknown>
+
+fmlall  za.s[w8, 0:3, vgx4], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x00,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a10020 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z3.b}, {z0.b-z3.b}  // 11000001-10100001-00000000-00100000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b }
+// CHECK-ENCODING: [0x20,0x00,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1a10020 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx4], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01100011-10100001
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa1,0x63,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1bd63a1 <unknown>
+
+fmlall  za.s[w11, 4:7], {z28.b-z31.b}, {z28.b-z31.b}  // 11000001-10111101-01100011-10100001
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b }
+// CHECK-ENCODING: [0xa1,0x63,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1bd63a1 <unknown>
+
+fmlall  za.s[w8, 0:3, vgx4], {z0.b-z3.b}, z0.b[0]  // 11000001-00010000-10000000-01000000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x40,0x80,0x10,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1108040 <unknown>
+
+fmlall  za.s[w8, 0:3], {z0.b-z3.b}, z0.b[0]  // 11000001-00010000-10000000-01000000
+// CHECK-INST: fmlall  za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0]
+// CHECK-ENCODING: [0x40,0x80,0x10,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c1108040 <unknown>
+
+fmlall  za.s[w11, 4:7, vgx4], {z28.b-z31.b}, z15.b[15]  // 11000001-00011111-11101111-11000111
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xc7,0xef,0x1f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c11fefc7 <unknown>
+
+fmlall  za.s[w11, 4:7], {z28.b-z31.b}, z15.b[15]  // 11000001-00011111-11101111-11000111
+// CHECK-INST: fmlall  za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15]
+// CHECK-ENCODING: [0xc7,0xef,0x1f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: c11fefc7 <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SME2/mopa-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/mopa-diagnostics.s
new file mode 100644
index 000000000000000..618f3268caebe66
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/mopa-diagnostics.s
@@ -0,0 +1,46 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+sme-lutv2 2>&1 < %s | FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate register
+
+fmopa   za0.h, p8/m, p0/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmopa   za0.h, p8/m, p0/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmopa   za0.h, p0/m, p8/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmopa   za0.h, p0/m, p8/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmopa   za0.s, p8/m, p0/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fmopa   za0.s, p8/m, p0/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmopa   za3.s, p7/m, p8/m, z31.b, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fmopa   za3.s, p7/m, p8/m, z31.b, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid matrix operand
+
+fmopa   za2.h, p0/m, p0/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmopa   za2.h, p0/m, p0/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid register suffixes
+
+fmopa   za0.h, p0/m, p0/m, z0.b, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmopa   za0.h, p0/m, p0/m, z0.b, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmopa   za3.s, p7/m, p0/m, z31.b, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmopa   za3.s, p7/m, p0/m, z31.b, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SME2/mopa.s b/llvm/test/MC/AArch64/FP8_SME2/mopa.s
new file mode 100644
index 000000000000000..ee9c2c56f8aded5
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/mopa.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,sme-f8f32 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-f8f16,sme-f8f32 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f8f16,sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f8f16,sme-f8f32 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-f8f16,sme-f8f32 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmopa   za0.h, p0/m, p0/m, z0.b, z0.b  // 10000000-10100000-00000000-00001000
+// CHECK-INST: fmopa   za0.h, p0/m, p0/m, z0.b, z0.b
+// CHECK-ENCODING: [0x08,0x00,0xa0,0x80]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: 80a00008 <unknown>
+
+
+fmopa   za1.h, p7/m, p7/m, z31.b, z31.b  // 10000000-10111111-11111111-11101001
+// CHECK-INST: fmopa   za1.h, p7/m, p7/m, z31.b, z31.b
+// CHECK-ENCODING: [0xe9,0xff,0xbf,0x80]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f16
+// CHECK-UNKNOWN: 80bfffe9 <unknown>
+
+
+fmopa   za0.s, p0/m, p0/m, z0.b, z0.b  // 10000000-10100000-00000000-00000000
+// CHECK-INST: fmopa   za0.s, p0/m, p0/m, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x00,0xa0,0x80]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: 80a00000 <unknown>
+
+fmopa   za3.s, p7/m, p7/m, z31.b, z31.b  // 10000000-10111111-11111111-11100011
+// CHECK-INST: fmopa   za3.s, p7/m, p7/m, z31.b, z31.b
+// CHECK-ENCODING: [0xe3,0xff,0xbf,0x80]
+// CHECK-ERROR: instruction requires: sme2 sme-f8f32
+// CHECK-UNKNOWN: 80bfffe3 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2/fmlal-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmlal-diagnostics.s
index 3fa960bcc4d29b9..ebc725606fb1fb3 100644
--- a/llvm/test/MC/AArch64/SME2/fmlal-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/fmlal-diagnostics.s
@@ -55,9 +55,9 @@ fmlal za.s[w8, 9:10, vgx2], {z12.h-z13.h}, {z8.h-z9.h}
 // --------------------------------------------------------------------------//
 // Invalid Register Suffix
 
-fmlal za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h}
+fmlal za.d[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h}
 // CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
-// CHECK-NEXT: fmlal za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h}
+// CHECK-NEXT: fmlal za.d[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h}
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 // --------------------------------------------------------------------------//
diff --git a/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s
index d991207ee1a739b..696798eaaa0786a 100644
--- a/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s
@@ -42,9 +42,9 @@ fvdot za.s[w8, 0, vgx2], {z1.h-z2.h}, z0.h[0]
 // --------------------------------------------------------------------------//
 // Invalid Matrix Operand
 
-fvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0]
+fvdot za.b[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0]
 // CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s
-// CHECK-NEXT: fvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0]
+// CHECK-NEXT: fvdot za.b[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 // --------------------------------------------------------------------------//
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index d4c5d68cb3ca7cb..570b155875f68be 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1745,7 +1745,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_FAMINMAX,     AArch64::AEK_FP8FMA,
       AArch64::AEK_SSVE_FP8FMA,  AArch64::AEK_FP8DOT2,
       AArch64::AEK_SSVE_FP8DOT2, AArch64::AEK_FP8DOT4,
-      AArch64::AEK_SSVE_FP8DOT4};
+      AArch64::AEK_SSVE_FP8DOT4, AArch64::AEK_SMEF8F16,
+      AArch64::AEK_SMEF8F32};
 
   std::vector<StringRef> Features;
 
@@ -1826,6 +1827,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4"));
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f16"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f32"));
 
   // Assuming we listed every extension above, this should produce the same
   // result. (note that AEK_NONE doesn't have a name so it won't be in the
@@ -1958,6 +1961,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"ssve-fp8dot2", "nossve-fp8dot2", "+ssve-fp8dot2", "-ssve-fp8dot2"},
       {"fp8dot4", "nofp8dot4", "+fp8dot4", "-fp8dot4"},
       {"ssve-fp8dot4", "nossve-fp8dot4", "+ssve-fp8dot4", "-ssve-fp8dot4"},
+      {"sme-f8f16", "nosme-f8f16", "+sme-f8f16", "-sme-f8f16"},
+      {"sme-f8f32", "nosme-f8f32", "+sme-f8f32", "-sme-f8f32"},
   };
 
   for (unsigned i = 0; i < std::size(ArchExt); i++) {



More information about the llvm-commits mailing list