[llvm] 2ecbe8c - [AArch64] SME2 Single-multi vector ternary int/FP 2 and 4 registers

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 19 09:50:58 PDT 2022


Author: Caroline Concatto
Date: 2022-10-19T17:49:48+01:00
New Revision: 2ecbe8c38c99174e91f3f4627c01ea215af527ed

URL: https://github.com/llvm/llvm-project/commit/2ecbe8c38c99174e91f3f4627c01ea215af527ed
DIFF: https://github.com/llvm/llvm-project/commit/2ecbe8c38c99174e91f3f4627c01ea215af527ed.diff

LOG: [AArch64] SME2 Single-multi vector ternary int/FP 2 and 4 registers

This patch adds the assembly/disassembly for the following instructions:

For INT:
    ADD(array results, multiple and single vector): Add replicated single
        vector to multi-vector with ZA array vector results.
    SUB(array results, multiple and single vector): Subtract replicated single
        vector from multi-vector with ZA array vector results.
For FP:
    FMLA (multiple and single vector): Multi-vector floating-point fused
          multiply-add by vector.
    FMLS (multiple and single vector): Multi-vector floating-point
          multiply-subtract long by vector.
The reference can be found here:

https://developer.arm.com/documentation/ddi0602/2022-09

The Matriz Operand has 2 new sizes 32(.s) and 64(.d) bits
(MatrixOp32 and MatrixOp64)

Depends on: D135448

Depends on:  D135952

Differential Revision: https://reviews.llvm.org/D135455

Added: 
    llvm/test/MC/AArch64/SME2/add-diagnostics.s
    llvm/test/MC/AArch64/SME2/add.s
    llvm/test/MC/AArch64/SME2/directive-arch.s
    llvm/test/MC/AArch64/SME2/fmla-diagnostics.s
    llvm/test/MC/AArch64/SME2/fmla.s
    llvm/test/MC/AArch64/SME2/fmls-diagnostics.s
    llvm/test/MC/AArch64/SME2/fmls.s
    llvm/test/MC/AArch64/SME2/sub-diagnostics.s
    llvm/test/MC/AArch64/SME2/sub.s

Modified: 
    llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
    llvm/lib/Target/AArch64/AArch64RegisterInfo.td
    llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
    llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
    llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
    llvm/lib/Target/AArch64/SMEInstrFormats.td
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
    llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
    llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir
    llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 1cb2a4dcc836e..7cb2c828230e1 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -933,6 +933,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case AArch64::FPR128RegClassID:
     return 32;
 
+  case AArch64::MatrixIndexGPR32_8_11RegClassID:
   case AArch64::MatrixIndexGPR32_12_15RegClassID:
     return 4;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index a2576155dbe47..5877819a49240 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1334,7 +1334,7 @@ def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
 //
 
 class MatrixAsmOperand<string RC, int EltSize> : AsmOperandClass {
-  let Name = "Matrix";
+  let Name = "Matrix" # !if(EltSize, !cast<string>(EltSize), "");
   let DiagnosticType = "Invalid" # Name;
   let ParserMethod = "tryParseMatrixRegister";
   let RenderMethod = "addMatrixOperands";
@@ -1349,6 +1349,9 @@ class MatrixOperand<RegisterClass RC, int EltSize> : RegisterOperand<RC> {
 }
 
 def MatrixOp : MatrixOperand<MPR, 0>;
+// SME2 register operands and classes
+def MatrixOp32 : MatrixOperand<MPR, 32>;
+def MatrixOp64 : MatrixOperand<MPR, 64>;
 
 class MatrixTileListAsmOperand : AsmOperandClass {
   let Name = "MatrixTileList";
@@ -1366,11 +1369,17 @@ class MatrixTileListOperand : Operand<i8> {
 
 def MatrixTileList : MatrixTileListOperand<>;
 
+def MatrixIndexGPR32_8_11 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 8, 11)> {
+   let DiagnosticType = "InvalidMatrixIndexGPR32_8_11";
+}
 def MatrixIndexGPR32_12_15 : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 12, 15)> {
   let DiagnosticType = "InvalidMatrixIndexGPR32_12_15";
 }
+def MatrixIndexGPR32Op8_11 : RegisterOperand<MatrixIndexGPR32_8_11> {
+  let EncoderMethod = "encodeMatrixIndexGPR32<AArch64::W8>";
+}
 def MatrixIndexGPR32Op12_15 : RegisterOperand<MatrixIndexGPR32_12_15> {
-  let EncoderMethod = "encodeMatrixIndexGPR32";
+  let EncoderMethod = "encodeMatrixIndexGPR32<AArch64::W12>";
 }
 
 def SVCROperand : AsmOperandClass {

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 924cc4f8ae94b..bd8b3b5275bd6 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -242,3 +242,37 @@ defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>;
 defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
 
 } // End let Predicates = [HasSME]
+
+//===----------------------------------------------------------------------===//
+// SME2 Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSME2] in {
+defm ADD_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"add", 0b10>;
+defm ADD_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"add", 0b10>;
+
+defm SUB_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"sub", 0b11>;
+defm SUB_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"sub", 0b11>;
+
+defm FMLA_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmla", 0b00>;
+defm FMLA_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmla", 0b00>;
+
+defm FMLS_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmls", 0b01>;
+defm FMLS_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmls", 0b01>;
+}
+
+
+let Predicates = [HasSME2, HasSMEI16I64] in {
+defm ADD_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"add", 0b10>;
+defm ADD_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"add", 0b10>;
+
+defm SUB_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"sub", 0b11>;
+defm SUB_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"sub", 0b11>;
+}
+
+let Predicates = [HasSME2, HasSMEF64F64] in {
+defm FMLA_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmla", 0b00>;
+defm FMLA_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmla", 0b00>;
+
+defm FMLS_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmls", 0b01>;
+defm FMLS_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmls", 0b01>;
+}

diff  --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c9b3e227c6af3..c89f1806638ff 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -165,6 +165,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
   bool parseSymbolicImmVal(const MCExpr *&ImmVal);
   bool parseNeonVectorList(OperandVector &Operands);
   bool parseOptionalMulOperand(OperandVector &Operands);
+  bool parseOptionalVGOperand(OperandVector &Operands, StringRef &VecGroup);
   bool parseKeywordOperand(OperandVector &Operands);
   bool parseOperand(OperandVector &Operands, bool isCondCode,
                     bool invertCondCode);
@@ -3136,10 +3137,22 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
 
   StringRef Name = Tok.getString();
 
-  if (Name.equals_insensitive("za")) {
-    Lex(); // eat "za"
+  if (Name.equals_insensitive("za") || Name.startswith_insensitive("za.")) {
+    Lex(); // eat "za[.(b|h|s|d)]"
+    unsigned ElementWidth = 0;
+    auto DotPosition = Name.find('.');
+    if (DotPosition != StringRef::npos) {
+      const auto &KindRes =
+          parseVectorKind(Name.drop_front(DotPosition), RegKind::Matrix);
+      if (!KindRes) {
+        TokError(
+            "Expected the register to be followed by element width suffix");
+        return MatchOperand_ParseFail;
+      }
+      ElementWidth = KindRes->second;
+    }
     Operands.push_back(AArch64Operand::CreateMatrixRegister(
-        AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
+        AArch64::ZA, ElementWidth, MatrixKind::Array, S, getLoc(),
         getContext()));
     if (getLexer().is(AsmToken::LBrac)) {
       // There's no comma after matrix operand, so we can parse the next operand
@@ -3299,6 +3312,7 @@ static const struct Extension {
     {"sme", {AArch64::FeatureSME}},
     {"sme-f64f64", {AArch64::FeatureSMEF64F64}},
     {"sme-i16i64", {AArch64::FeatureSMEI16I64}},
+    {"sme2", {AArch64::FeatureSME2}},
     {"hbc", {AArch64::FeatureHBC}},
     {"mops", {AArch64::FeatureMOPS}},
     // FIXME: Unsupported extensions
@@ -4212,6 +4226,26 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
   return Error(getLoc(), "expected 'vl' or '#<imm>'");
 }
 
+bool AArch64AsmParser::parseOptionalVGOperand(OperandVector &Operands,
+                                              StringRef &VecGroup) {
+  MCAsmParser &Parser = getParser();
+  auto Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return true;
+
+  StringRef VG = StringSwitch<StringRef>(Tok.getString().lower())
+                     .Case("vgx2", "vgx2")
+                     .Case("vgx4", "vgx4")
+                     .Default("");
+
+  if (VG.empty())
+    return true;
+
+  VecGroup = VG;
+  Parser.Lex(); // Eat vgx[2|4]
+  return false;
+}
+
 bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
   auto Tok = getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -4283,6 +4317,13 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     return parseOperand(Operands, false, false);
   }
   case AsmToken::Identifier: {
+    // See if this is a "VG" decoration used by SME instructions.
+    StringRef VecGroup;
+    if (!parseOptionalVGOperand(Operands, VecGroup)) {
+      Operands.push_back(
+          AArch64Operand::CreateToken(VecGroup, getLoc(), getContext()));
+      return false;
+    }
     // If we're expecting a Condition Code operand, then just parse that.
     if (isCondCode)
       return parseCondCode(Operands, invertCondCode);
@@ -5465,8 +5506,14 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
     return Error(Loc, "invalid matrix operand, expected za[0-7].d");
   case Match_InvalidMatrix:
     return Error(Loc, "invalid matrix operand, expected za");
+  case Match_InvalidMatrix32:
+    return Error(Loc, "invalid matrix operand, expected suffix .s");
+  case Match_InvalidMatrix64:
+    return Error(Loc, "invalid matrix operand, expected suffix .d");
   case Match_InvalidMatrixIndexGPR32_12_15:
     return Error(Loc, "operand must be a register in range [w12, w15]");
+  case Match_InvalidMatrixIndexGPR32_8_11:
+    return Error(Loc, "operand must be a register in range [w8, w11]");
   default:
     llvm_unreachable("unexpected error code!");
   }
@@ -5989,6 +6036,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidMatrixTile32:
   case Match_InvalidMatrixTile64:
   case Match_InvalidMatrix:
+  case Match_InvalidMatrix32:
+  case Match_InvalidMatrix64:
   case Match_InvalidMatrixTileVectorH8:
   case Match_InvalidMatrixTileVectorH16:
   case Match_InvalidMatrixTileVectorH32:
@@ -6001,6 +6050,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidMatrixTileVectorV128:
   case Match_InvalidSVCR:
   case Match_InvalidMatrixIndexGPR32_12_15:
+  case Match_InvalidMatrixIndexGPR32_8_11:
   case Match_MSR:
   case Match_MRS: {
     if (ErrorInfo >= Operands.size())

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 8a35e970f0f6c..eae84d27d430a 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -69,6 +69,9 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
                                                const MCDisassembler *Decoder);
 static DecodeStatus
+DecodeMatrixIndexGPR32_8_11RegisterClass(MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
 DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo,
                                           uint64_t Address,
                                           const MCDisassembler *Decoder);
@@ -510,6 +513,19 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
   return Success;
 }
 
+static DecodeStatus
+DecodeMatrixIndexGPR32_8_11RegisterClass(MCInst &Inst, unsigned RegNo,
+                                         uint64_t Addr, const void *Decoder) {
+  if (RegNo > 3)
+    return Fail;
+
+  unsigned Register =
+      AArch64MCRegisterClasses[AArch64::MatrixIndexGPR32_8_11RegClassID]
+          .getRegister(RegNo);
+  Inst.addOperand(MCOperand::createReg(Register));
+  return Success;
+}
+
 static DecodeStatus
 DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo,
                                           uint64_t Addr,

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 47785ef73cced..0006abdbde5f5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -272,7 +272,9 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   case AArch64::rtcGPR64RegClassID:
   case AArch64::WSeqPairsClassRegClassID:
   case AArch64::XSeqPairsClassRegClassID:
+  case AArch64::MatrixIndexGPR32_8_11RegClassID:
   case AArch64::MatrixIndexGPR32_12_15RegClassID:
+  case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
     return getRegBank(AArch64::GPRRegBankID);
   case AArch64::CCRRegClassID:

diff  --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index bd0a497fa4419..cb0705df43bf6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -188,6 +188,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
   uint32_t EncodeMatrixTileListRegisterClass(const MCInst &MI, unsigned OpIdx,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              const MCSubtargetInfo &STI) const;
+  template <unsigned BaseReg>
   uint32_t encodeMatrixIndexGPR32(const MCInst &MI, unsigned OpIdx,
                                   SmallVectorImpl<MCFixup> &Fixups,
                                   const MCSubtargetInfo &STI) const;
@@ -524,14 +525,13 @@ uint32_t AArch64MCCodeEmitter::EncodeMatrixTileListRegisterClass(
   return RegMask;
 }
 
+template <unsigned BaseReg>
 uint32_t
 AArch64MCCodeEmitter::encodeMatrixIndexGPR32(const MCInst &MI, unsigned OpIdx,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              const MCSubtargetInfo &STI) const {
   auto RegOpnd = MI.getOperand(OpIdx).getReg();
-  assert(RegOpnd >= AArch64::W12 && RegOpnd <= AArch64::W15 &&
-         "Expected register in the range w12-w15!");
-  return RegOpnd - AArch64::W12;
+  return RegOpnd - BaseReg;
 }
 
 uint32_t

diff  --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 621a8862954b8..655804e3b116e 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1186,3 +1186,71 @@ multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
               (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
   }
 }
+
+//===----------------------------------------------------------------------===//
+// SME2 Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SME2 single-multi ternary int/fp, two/four registers
+
+class sme2_mla_add_sub_array_vg24_single<bit vg4, bit sz, bits<2> op,
+                                         MatrixOperand matrix_ty,
+                                         RegisterOperand multi_vector_ty,
+                                         ZPRRegOp zpr_ty,
+                                         string mnemonic>
+   : I<(outs matrix_ty:$ZAd),
+       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
+       sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
+       mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm",
+       "", []> , Sched<[]> {
+  bits<4> Zm;
+  bits<5> Zn;
+  bits<2> Rv;
+  bits<3> imm3;
+  let Inst{31-23} = 0b110000010;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20}    = vg4;
+  let Inst{19-16} = Zm;
+  let Inst{15}    = 0b0;
+  let Inst{14-13} = Rv;
+  let Inst{12-10} = 0b110;
+  let Inst{9-5}   = Zn;
+  let Inst{4-3}   = op;
+  let Inst{2-0}   = imm3;
+  let Constraints = "$ZAd = $_ZAd";
+}
+
+multiclass sme2_mla_add_sub_array_vg2_single_S<string mnemonic, bits<2> op>{
+  def NAME : sme2_mla_add_sub_array_vg24_single<0b0, 0b0, op, MatrixOp32, ZZ_s,
+                                                ZPR4b32, mnemonic>;
+
+  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
+                 (!cast<Instruction>(NAME) MatrixOp32:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_s:$Zn, ZPR4b32:$Zm), 0>;
+}
+
+multiclass sme2_mla_add_sub_array_vg2_single_D<string mnemonic, bits<2> op>{
+  def NAME : sme2_mla_add_sub_array_vg24_single<0b0, 0b1, op, MatrixOp64,
+                                                ZZ_d, ZPR4b64, mnemonic>;
+
+  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
+                 (!cast<Instruction>(NAME) MatrixOp64:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_d:$Zn, ZPR4b64:$Zm), 0>;
+}
+
+multiclass sme2_mla_add_sub_array_vg4_single_S<string mnemonic, bits<2> op>{
+  def NAME : sme2_mla_add_sub_array_vg24_single<0b1, 0b0, op, MatrixOp32, ZZZZ_s,
+                                                ZPR4b32, mnemonic>;
+
+  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
+                 (!cast<Instruction>(NAME) MatrixOp32:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_s:$Zn, ZPR4b32:$Zm), 0>;
+}
+
+multiclass sme2_mla_add_sub_array_vg4_single_D<string mnemonic, bits<2> op>{
+  def NAME : sme2_mla_add_sub_array_vg24_single<0b1, 0b1, op, MatrixOp64, ZZZZ_d,
+                                                ZPR4b64, mnemonic>;
+
+  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
+                 (!cast<Instruction>(NAME) MatrixOp64:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_d:$Zn, ZPR4b64:$Zm), 0>;
+}
+

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
index 9924e0c3e5924..0f1432c354544 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
@@ -26,7 +26,7 @@ define void @asm_simple_register_clobber() {
 define i64 @asm_register_early_clobber() {
   ; CHECK-LABEL: name: asm_register_early_clobber
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 1572875 /* regdef-ec:GPR64common */, def early-clobber %0, 1572875 /* regdef-ec:GPR64common */, def early-clobber %1, !0
+  ; CHECK:   INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 1703947 /* regdef-ec:GPR64common */, def early-clobber %0, 1703947 /* regdef-ec:GPR64common */, def early-clobber %1, !0
   ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY %0
   ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY %1
   ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]]
@@ -66,7 +66,7 @@ entry:
 define i64 @test_single_register_output_s64() nounwind ssp {
   ; CHECK-LABEL: name: test_single_register_output_s64
   ; CHECK: bb.1.entry:
-  ; CHECK:   INLINEASM &"mov $0, 7", 0 /* attdialect */, 1572874 /* regdef:GPR64common */, def %0
+  ; CHECK:   INLINEASM &"mov $0, 7", 0 /* attdialect */, 1703946 /* regdef:GPR64common */, def %0
   ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY %0
   ; CHECK:   $x0 = COPY [[COPY]](s64)
   ; CHECK:   RET_ReallyLR implicit $x0
@@ -96,7 +96,7 @@ define float @test_multiple_register_outputs_same() #0 {
 define double @test_multiple_register_outputs_mixed() #0 {
   ; CHECK-LABEL: name: test_multiple_register_outputs_mixed
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0, 1376266 /* regdef:FPR64 */, def %1
+  ; CHECK:   INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0, 1507338 /* regdef:FPR64 */, def %1
   ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY %0
   ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY %1
   ; CHECK:   $d0 = COPY [[COPY1]](s64)
@@ -123,6 +123,7 @@ define zeroext i8 @test_register_output_trunc(i8* %src) nounwind {
   ; CHECK-LABEL: name: test_register_output_trunc
   ; CHECK: bb.1.entry:
   ; CHECK:   liveins: $x0
+  ; 
   ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
   ; CHECK:   INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %1
   ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY %1
@@ -154,7 +155,7 @@ define void @test_input_register_imm() {
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
   ; CHECK:   [[COPY:%[0-9]+]]:gpr64common = COPY [[C]](s64)
-  ; CHECK:   INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 1572873 /* reguse:GPR64common */, [[COPY]]
+  ; CHECK:   INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:GPR64common */, [[COPY]]
   ; CHECK:   RET_ReallyLR
   call void asm sideeffect "mov x0, $0", "r"(i64 42)
   ret void
@@ -188,7 +189,7 @@ define zeroext i8 @test_input_register(i8* %src) nounwind {
   ; CHECK:   liveins: $x0
   ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
   ; CHECK:   [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0)
-  ; CHECK:   INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %1, 1572873 /* reguse:GPR64common */, [[COPY1]]
+  ; CHECK:   INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %1, 1703945 /* reguse:GPR64common */, [[COPY1]]
   ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY %1
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
index 5d1e78285bf72..444f791c7b695 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
@@ -69,7 +69,7 @@ define void @test2() #0 personality i32 (...)* @__gcc_personality_v0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64common = COPY [[DEF]](p0)
-  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, 1572873 /* reguse:GPR64common */, [[COPY]]
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, 1703945 /* reguse:GPR64common */, [[COPY]]
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.a:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
index 61bacc2f19551..cef2635aacaf2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
@@ -57,11 +57,11 @@ tracksRegLiveness: true
 body:             |
   bb.1:
     ; CHECK-LABEL: name: inlineasm_virt_reg_output
-    ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0
+    ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %0
     ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
     ; CHECK: $w0 = COPY [[COPY]](s32)
     ; CHECK: RET_ReallyLR implicit $w0
-    INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0:gpr32common
+    INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common
     %1:_(s32) = COPY %0
     $w0 = COPY %1(s32)
     RET_ReallyLR implicit $w0
@@ -75,12 +75,12 @@ tracksRegLiveness: true
 body:             |
   bb.1:
     ; CHECK-LABEL: name: inlineasm_virt_mixed_types
-    ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0, 1376266 /* regdef:FPR64 */, def %1
+    ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %0, 2162698 /* regdef:FIXED_REGS */, def %1
     ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
     ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1
     ; CHECK: $d0 = COPY [[COPY1]](s64)
     ; CHECK: RET_ReallyLR implicit $d0
-    INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0:gpr32common, 1376266 /* regdef:FPR64 */, def %1:fpr64
+    INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common, 2162698 /* regdef:FPR64 */, def %1:fpr64
     %3:_(s32) = COPY %0
     %4:_(s64) = COPY %1
     $d0 = COPY %4(s64)

diff  --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir
index 8af0e385fc31a..86b8030e5cc33 100644
--- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir
@@ -13,12 +13,12 @@
 # CHECK-LABEL: name: test1
 # CHECK:       bb.0:
 # CHECK-NEXT:     liveins: $x0, $x1
-# PRESERVED:      $x18, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64))
+# PRESERVED:      $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64))
 # NOPRES:         $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64))
-# CHECK-NEXT:     renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64))
+# CHECK:          renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64))
 # CHECK-NEXT:     STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4)
 # CHECK-NEXT:     renamable $x8 = ADDXrr $x8, $x8
-# PRESERVED-NEXT: STPXi renamable $x8, killed $x18, renamable $x0, 10 :: (store (s64), align 4)
+# PRESERVED-NEXT: STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4)
 # NOPRES-NEXT:    STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4)
 # CHECK-NEXT:     RET undef $lr
 
@@ -49,16 +49,17 @@ body:             |
 # CHECK:       bb.0:
 # CHECK-NEXT:     liveins: $x0, $x1, $x10, $x11, $x12, $x13
 # CHECK:          renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64))
-# PRESERVED-NEXT: $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
-# NOPRES-NEXT:    $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
+# PRESERVED-NEXT: renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
+# NOPRES-NEXT:    renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
+# CHECK-NEXT:     STRXui killed renamable $x9, renamable $x0, 11 :: (store (s64), align 4)
 # CHECK-NEXT:     renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64))
 # CHECK-NEXT:     renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64))
-# PRESERVED-NEXT: STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store (s64), align 4)
-# NOPRES-NEXT:    STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store (s64), align 4)
+# PRESERVED-NEXT: STRXui renamable $x9, renamable $x0, 10 :: (store (s64), align 4)
+# NOPRES-NEXT:    STRXui renamable $x9, renamable $x0, 10 :: (store (s64), align 4)
 # CHECK-NEXT:     STRXui killed renamable $x14, renamable $x0, 200 :: (store (s64), align 4)
 # CHECK-NEXT:     renamable $w8 = ADDWrr $w19, $w19
 # CHECK-NEXT:     STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4)
-# CHECK-NEXT:     RET undef $lr
+# CHECK-NEXT:    RET undef $lr
 #
 name:            test2
 alignment:       4

diff  --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
index 64d27da22d695..61c0cc505b187 100644
--- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir
@@ -364,10 +364,11 @@ body:             |
 # CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13
 
 # CHECK:         renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64))
-# CHECK-NEXT:    $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
+# CHECK-NEXT:    renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64))
+# CHECK-NEXT:    STRXui killed renamable $x9, renamable $x0, 11 :: (store (s64), align 4)
 # CHECK-NEXT:    renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64))
 # CHECK-NEXT:    renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64))
-# CHECK-NEXT:    STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store (s64), align 4)
+# CHECK-NEXT:    STRXui renamable $x9, renamable $x0, 10 :: (store (s64), align 4)
 # CHECK-NEXT:    STRXui killed renamable $x14, renamable $x0, 200 :: (store (s64), align 4)
 # CHECK-NEXT:    renamable $w8 = ADDWrr $w10, $w10
 # CHECK-NEXT:    STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4)
@@ -445,11 +446,12 @@ body:             |
 # CHECK-LABEL: name: test13
 # CHECK: bb.0:
 # CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13
-# CHECK:        $x18, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64))
+# CHECK:    renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64))
 # CHECK-NEXT:    renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64))
 # CHECK-NEXT:    STRXui killed renamable $x14, renamable $x0, 100 :: (store (s64), align 4)
+# CHECK-NEXT:    STRXui killed renamable $x9, renamable $x0, 11 :: (store (s64), align 4)
 # CHECK-NEXT:    renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64))
-# CHECK-NEXT:    STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store (s64), align 4)
+# CHECK-NEXT:    STRXui renamable $x9, renamable $x0, 10 :: (store (s64))
 # CHECK-NEXT:    RET undef $lr
 #
 name:            test13

diff  --git a/llvm/test/MC/AArch64/SME2/add-diagnostics.s b/llvm/test/MC/AArch64/SME2/add-diagnostics.s
new file mode 100644
index 0000000000000..cd7c2f0c66167
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/add-diagnostics.s
@@ -0,0 +1,49 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Out of range index offset
+
+add za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: add za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+add za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: add za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+add za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: add za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+add za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: add za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Matrix Operand
+
+add za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d
+// CHECK-NEXT: add za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+add za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+add za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+

diff  --git a/llvm/test/MC/AArch64/SME2/add.s b/llvm/test/MC/AArch64/SME2/add.s
new file mode 100644
index 0000000000000..d5c2d51c9b4cc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/add.s
@@ -0,0 +1,593 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+add     za.s[w8, 0, vgx2], {z0.s, z1.s}, z0.s  // 11000001, 00100000, 00011000, 00010000
+// CHECK-INST: add     za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x10,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201810 <unknown>
+
+add     za.s[w8, 0], {z0.s - z1.s}, z0.s  // 11000001-00100000-00011000-00010000
+// CHECK-INST: add     za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x10,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201810 <unknown>
+
+add     za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s  // 11000001, 00100101, 01011001, 01010101
+// CHECK-INST: add     za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x55,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1255955 <unknown>
+
+add     za.s[w10, 5], {z10.s - z11.s}, z5.s  // 11000001-00100101-01011001-01010101
+// CHECK-INST: add     za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x55,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1255955 <unknown>
+
+add     za.s[w11, 7, vgx2], {z13.s, z14.s}, z8.s  // 11000001, 00101000, 01111001, 10110111
+// CHECK-INST: add     za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xb7,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879b7 <unknown>
+
+add     za.s[w11, 7], {z13.s - z14.s}, z8.s  // 11000001-00101000-01111001-10110111
+// CHECK-INST: add     za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xb7,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879b7 <unknown>
+
+add     za.s[w11, 7, vgx2], {z31.s, z0.s}, z15.s  // 11000001, 00101111, 01111011, 11110111
+// CHECK-INST: add     za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xf7,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bf7 <unknown>
+
+add     za.s[w11, 7], {z31.s - z0.s}, z15.s  // 11000001-00101111-01111011-11110111
+// CHECK-INST: add     za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xf7,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bf7 <unknown>
+
+add     za.s[w8, 5, vgx2], {z17.s, z18.s}, z0.s  // 11000001, 00100000, 00011010, 00110101
+// CHECK-INST: add     za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x35,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a35 <unknown>
+
+add     za.s[w8, 5], {z17.s - z18.s}, z0.s  // 11000001-00100000-00011010-00110101
+// CHECK-INST: add     za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x35,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a35 <unknown>
+
+add     za.s[w8, 1, vgx2], {z1.s, z2.s}, z14.s  // 11000001, 00101110, 00011000, 00110001
+// CHECK-INST: add     za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x31,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1831 <unknown>
+
+add     za.s[w8, 1], {z1.s - z2.s}, z14.s  // 11000001-00101110-00011000-00110001
+// CHECK-INST: add     za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x31,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1831 <unknown>
+
+add     za.s[w10, 0, vgx2], {z19.s, z20.s}, z4.s  // 11000001, 00100100, 01011010, 01110000
+// CHECK-INST: add     za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x70,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a70 <unknown>
+
+add     za.s[w10, 0], {z19.s - z20.s}, z4.s  // 11000001-00100100-01011010-01110000
+// CHECK-INST: add     za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x70,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a70 <unknown>
+
+add     za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s  // 11000001, 00100010, 00011001, 10010000
+// CHECK-INST: add     za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x90,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221990 <unknown>
+
+add     za.s[w8, 0], {z12.s - z13.s}, z2.s  // 11000001-00100010-00011001-10010000
+// CHECK-INST: add     za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x90,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221990 <unknown>
+
+add     za.s[w10, 1, vgx2], {z1.s, z2.s}, z10.s  // 11000001, 00101010, 01011000, 00110001
+// CHECK-INST: add     za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x31,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5831 <unknown>
+
+add     za.s[w10, 1], {z1.s - z2.s}, z10.s  // 11000001-00101010-01011000-00110001
+// CHECK-INST: add     za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x31,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5831 <unknown>
+
+add     za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s  // 11000001, 00101110, 00011010, 11010101
+// CHECK-INST: add     za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xd5,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1ad5 <unknown>
+
+add     za.s[w8, 5], {z22.s - z23.s}, z14.s  // 11000001-00101110-00011010-11010101
+// CHECK-INST: add     za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xd5,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1ad5 <unknown>
+
+add     za.s[w11, 2, vgx2], {z9.s, z10.s}, z1.s  // 11000001, 00100001, 01111001, 00110010
+// CHECK-INST: add     za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x32,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1217932 <unknown>
+
+add     za.s[w11, 2], {z9.s - z10.s}, z1.s  // 11000001-00100001-01111001-00110010
+// CHECK-INST: add     za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x32,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1217932 <unknown>
+
+add     za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s  // 11000001, 00101011, 00111001, 10010111
+// CHECK-INST: add     za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x97,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b3997 <unknown>
+
+add     za.s[w9, 7], {z12.s - z13.s}, z11.s  // 11000001-00101011-00111001-10010111
+// CHECK-INST: add     za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x97,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b3997 <unknown>
+
+
+add     za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d  // 11000001, 01100000, 00011000, 00010000
+// CHECK-INST: add     za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x10,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601810 <unknown>
+
+add     za.d[w8, 0], {z0.d - z1.d}, z0.d  // 11000001-01100000-00011000-00010000
+// CHECK-INST: add     za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x10,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601810 <unknown>
+
+add     za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d  // 11000001, 01100101, 01011001, 01010101
+// CHECK-INST: add     za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x55,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1655955 <unknown>
+
+add     za.d[w10, 5], {z10.d - z11.d}, z5.d  // 11000001-01100101-01011001-01010101
+// CHECK-INST: add     za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x55,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1655955 <unknown>
+
+add     za.d[w11, 7, vgx2], {z13.d, z14.d}, z8.d  // 11000001, 01101000, 01111001, 10110111
+// CHECK-INST: add     za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xb7,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879b7 <unknown>
+
+add     za.d[w11, 7], {z13.d - z14.d}, z8.d  // 11000001-01101000-01111001-10110111
+// CHECK-INST: add     za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xb7,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879b7 <unknown>
+
+add     za.d[w11, 7, vgx2], {z31.d, z0.d}, z15.d  // 11000001, 01101111, 01111011, 11110111
+// CHECK-INST: add     za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xf7,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bf7 <unknown>
+
+add     za.d[w11, 7], {z31.d - z0.d}, z15.d  // 11000001-01101111-01111011-11110111
+// CHECK-INST: add     za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xf7,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bf7 <unknown>
+
+add     za.d[w8, 5, vgx2], {z17.d, z18.d}, z0.d  // 11000001, 01100000, 00011010, 00110101
+// CHECK-INST: add     za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x35,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a35 <unknown>
+
+add     za.d[w8, 5], {z17.d - z18.d}, z0.d  // 11000001-01100000-00011010-00110101
+// CHECK-INST: add     za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x35,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a35 <unknown>
+
+add     za.d[w8, 1, vgx2], {z1.d, z2.d}, z14.d  // 11000001, 01101110, 00011000, 00110001
+// CHECK-INST: add     za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x31,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1831 <unknown>
+
+add     za.d[w8, 1], {z1.d - z2.d}, z14.d  // 11000001-01101110-00011000-00110001
+// CHECK-INST: add     za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x31,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1831 <unknown>
+
+add     za.d[w10, 0, vgx2], {z19.d, z20.d}, z4.d  // 11000001, 01100100, 01011010, 01110000
+// CHECK-INST: add     za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x70,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a70 <unknown>
+
+add     za.d[w10, 0], {z19.d - z20.d}, z4.d  // 11000001-01100100-01011010-01110000
+// CHECK-INST: add     za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x70,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a70 <unknown>
+
+add     za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d  // 11000001, 01100010, 00011001, 10010000
+// CHECK-INST: add     za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x90,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621990 <unknown>
+
+add     za.d[w8, 0], {z12.d - z13.d}, z2.d  // 11000001-01100010-00011001-10010000
+// CHECK-INST: add     za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x90,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621990 <unknown>
+
+add     za.d[w10, 1, vgx2], {z1.d, z2.d}, z10.d  // 11000001, 01101010, 01011000, 00110001
+// CHECK-INST: add     za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x31,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5831 <unknown>
+
+add     za.d[w10, 1], {z1.d - z2.d}, z10.d  // 11000001-01101010-01011000-00110001
+// CHECK-INST: add     za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x31,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5831 <unknown>
+
+add     za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d  // 11000001, 01101110, 00011010, 11010101
+// CHECK-INST: add     za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xd5,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1ad5 <unknown>
+
+add     za.d[w8, 5], {z22.d - z23.d}, z14.d  // 11000001-01101110-00011010-11010101
+// CHECK-INST: add     za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xd5,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1ad5 <unknown>
+
+add     za.d[w11, 2, vgx2], {z9.d, z10.d}, z1.d  // 11000001, 01100001, 01111001, 00110010
+// CHECK-INST: add     za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x32,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1617932 <unknown>
+
+add     za.d[w11, 2], {z9.d - z10.d}, z1.d  // 11000001-01100001-01111001-00110010
+// CHECK-INST: add     za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x32,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1617932 <unknown>
+
+add     za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d  // 11000001, 01101011, 00111001, 10010111
+// CHECK-INST: add     za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x97,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b3997 <unknown>
+
+add     za.d[w9, 7], {z12.d - z13.d}, z11.d  // 11000001-01101011-00111001-10010111
+// CHECK-INST: add     za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x97,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b3997 <unknown>
+
+
+add     za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00010000
+// CHECK-INST: add     za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x10,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301810 <unknown>
+
+add     za.s[w8, 0], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00010000
+// CHECK-INST: add     za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x10,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301810 <unknown>
+
+add     za.s[w10, 5, vgx4], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01010101
+// CHECK-INST: add     za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x55,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1355955 <unknown>
+
+add     za.s[w10, 5], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01010101
+// CHECK-INST: add     za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x55,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1355955 <unknown>
+
+add     za.s[w11, 7, vgx4], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10110111
+// CHECK-INST: add     za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xb7,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879b7 <unknown>
+
+add     za.s[w11, 7], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10110111
+// CHECK-INST: add     za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xb7,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879b7 <unknown>
+
+add     za.s[w11, 7, vgx4], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11110111
+// CHECK-INST: add     za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xf7,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bf7 <unknown>
+
+add     za.s[w11, 7], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11110111
+// CHECK-INST: add     za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xf7,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bf7 <unknown>
+
+add     za.s[w8, 5, vgx4], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00110101
+// CHECK-INST: add     za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x35,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a35 <unknown>
+
+add     za.s[w8, 5], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00110101
+// CHECK-INST: add     za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x35,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a35 <unknown>
+
+add     za.s[w8, 1, vgx4], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00110001
+// CHECK-INST: add     za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x31,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1831 <unknown>
+
+add     za.s[w8, 1], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00110001
+// CHECK-INST: add     za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x31,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1831 <unknown>
+
+add     za.s[w10, 0, vgx4], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01110000
+// CHECK-INST: add     za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x70,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a70 <unknown>
+
+add     za.s[w10, 0], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01110000
+// CHECK-INST: add     za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x70,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a70 <unknown>
+
+add     za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10010000
+// CHECK-INST: add     za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x90,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321990 <unknown>
+
+add     za.s[w8, 0], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10010000
+// CHECK-INST: add     za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x90,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321990 <unknown>
+
+add     za.s[w10, 1, vgx4], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00110001
+// CHECK-INST: add     za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x31,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5831 <unknown>
+
+add     za.s[w10, 1], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00110001
+// CHECK-INST: add     za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x31,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5831 <unknown>
+
+add     za.s[w8, 5, vgx4], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11010101
+// CHECK-INST: add     za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xd5,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1ad5 <unknown>
+
+add     za.s[w8, 5], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11010101
+// CHECK-INST: add     za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xd5,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1ad5 <unknown>
+
+add     za.s[w11, 2, vgx4], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00110010
+// CHECK-INST: add     za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x32,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1317932 <unknown>
+
+add     za.s[w11, 2], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00110010
+// CHECK-INST: add     za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x32,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1317932 <unknown>
+
+add     za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10010111
+// CHECK-INST: add     za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x97,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b3997 <unknown>
+
+add     za.s[w9, 7], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10010111
+// CHECK-INST: add     za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x97,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b3997 <unknown>
+
+
+add     za.d[w8, 0, vgx4], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00010000
+// CHECK-INST: add     za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x10,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701810 <unknown>
+
+add     za.d[w8, 0], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00010000
+// CHECK-INST: add     za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x10,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701810 <unknown>
+
+add     za.d[w10, 5, vgx4], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01010101
+// CHECK-INST: add     za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x55,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1755955 <unknown>
+
+add     za.d[w10, 5], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01010101
+// CHECK-INST: add     za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x55,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1755955 <unknown>
+
+add     za.d[w11, 7, vgx4], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10110111
+// CHECK-INST: add     za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xb7,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879b7 <unknown>
+
+add     za.d[w11, 7], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10110111
+// CHECK-INST: add     za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xb7,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879b7 <unknown>
+
+add     za.d[w11, 7, vgx4], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11110111
+// CHECK-INST: add     za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xf7,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bf7 <unknown>
+
+add     za.d[w11, 7], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11110111
+// CHECK-INST: add     za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xf7,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bf7 <unknown>
+
+add     za.d[w8, 5, vgx4], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00110101
+// CHECK-INST: add     za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x35,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a35 <unknown>
+
+add     za.d[w8, 5], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00110101
+// CHECK-INST: add     za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x35,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a35 <unknown>
+
+add     za.d[w8, 1, vgx4], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00110001
+// CHECK-INST: add     za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x31,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1831 <unknown>
+
+add     za.d[w8, 1], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00110001
+// CHECK-INST: add     za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x31,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1831 <unknown>
+
+add     za.d[w10, 0, vgx4], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01110000
+// CHECK-INST: add     za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x70,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a70 <unknown>
+
+add     za.d[w10, 0], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01110000
+// CHECK-INST: add     za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x70,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a70 <unknown>
+
+add     za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10010000
+// CHECK-INST: add     za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x90,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721990 <unknown>
+
+add     za.d[w8, 0], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10010000
+// CHECK-INST: add     za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x90,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721990 <unknown>
+
+add     za.d[w10, 1, vgx4], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00110001
+// CHECK-INST: add     za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x31,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5831 <unknown>
+
+add     za.d[w10, 1], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00110001
+// CHECK-INST: add     za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x31,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5831 <unknown>
+
+add     za.d[w8, 5, vgx4], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11010101
+// CHECK-INST: add     za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xd5,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1ad5 <unknown>
+
+add     za.d[w8, 5], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11010101
+// CHECK-INST: add     za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xd5,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1ad5 <unknown>
+
+add     za.d[w11, 2, vgx4], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00110010
+// CHECK-INST: add     za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x32,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1717932 <unknown>
+
+add     za.d[w11, 2], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00110010
+// CHECK-INST: add     za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x32,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1717932 <unknown>
+
+add     za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10010111
+// CHECK-INST: add     za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x97,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b3997 <unknown>
+
+add     za.d[w9, 7], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10010111
+// CHECK-INST: add     za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x97,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b3997 <unknown>
+

diff  --git a/llvm/test/MC/AArch64/SME2/directive-arch.s b/llvm/test/MC/AArch64/SME2/directive-arch.s
new file mode 100644
index 0000000000000..cb787288000e8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/directive-arch.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+
+.arch armv9-a+sme2
+add za.s[w8, 7], {z20.s-z21.s}, z10.s
+// CHECK: add	za.s[w8, 7, vgx2], { z20.s, z21.s }, z10.s
+
+.arch armv9-a+nosme2
+

diff  --git a/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s
new file mode 100644
index 0000000000000..83db84e54a1b2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Out of range index offset
+
+fmla za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fmla za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmla za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fmla za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fmla za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmla za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmla za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmla za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Matrix Operand
+
+fmla za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d
+// CHECK-NEXT: fmla za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+fmla za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmla za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+

diff  --git a/llvm/test/MC/AArch64/SME2/fmla.s b/llvm/test/MC/AArch64/SME2/fmla.s
new file mode 100644
index 0000000000000..92d433ef8d161
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/fmla.s
@@ -0,0 +1,593 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-f64f64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-f64f64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+fmla    za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d  // 11000001, 01100000, 00011000, 00000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x00,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601800 <unknown>
+
+fmla    za.d[w8, 0], {z0.d - z1.d}, z0.d  // 11000001-01100000-00011000-00000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x00,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601800 <unknown>
+
+fmla    za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d  // 11000001, 01100101, 01011001, 01000101
+// CHECK-INST: fmla    za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x45,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1655945 <unknown>
+
+fmla    za.d[w10, 5], {z10.d - z11.d}, z5.d  // 11000001-01100101-01011001-01000101
+// CHECK-INST: fmla    za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x45,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1655945 <unknown>
+
+fmla    za.d[w11, 7, vgx2], {z13.d, z14.d}, z8.d  // 11000001, 01101000, 01111001, 10100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xa7,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879a7 <unknown>
+
+fmla    za.d[w11, 7], {z13.d - z14.d}, z8.d  // 11000001-01101000-01111001-10100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xa7,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879a7 <unknown>
+
+fmla    za.d[w11, 7, vgx2], {z31.d, z0.d}, z15.d  // 11000001, 01101111, 01111011, 11100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xe7,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7be7 <unknown>
+
+fmla    za.d[w11, 7], {z31.d - z0.d}, z15.d  // 11000001-01101111-01111011-11100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xe7,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7be7 <unknown>
+
+fmla    za.d[w8, 5, vgx2], {z17.d, z18.d}, z0.d  // 11000001, 01100000, 00011010, 00100101
+// CHECK-INST: fmla    za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x25,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a25 <unknown>
+
+fmla    za.d[w8, 5], {z17.d - z18.d}, z0.d  // 11000001-01100000-00011010-00100101
+// CHECK-INST: fmla    za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x25,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a25 <unknown>
+
+fmla    za.d[w8, 1, vgx2], {z1.d, z2.d}, z14.d  // 11000001, 01101110, 00011000, 00100001
+// CHECK-INST: fmla    za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x21,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1821 <unknown>
+
+fmla    za.d[w8, 1], {z1.d - z2.d}, z14.d  // 11000001-01101110-00011000-00100001
+// CHECK-INST: fmla    za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x21,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1821 <unknown>
+
+fmla    za.d[w10, 0, vgx2], {z19.d, z20.d}, z4.d  // 11000001, 01100100, 01011010, 01100000
+// CHECK-INST: fmla    za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x60,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a60 <unknown>
+
+fmla    za.d[w10, 0], {z19.d - z20.d}, z4.d  // 11000001-01100100-01011010-01100000
+// CHECK-INST: fmla    za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x60,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a60 <unknown>
+
+fmla    za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d  // 11000001, 01100010, 00011001, 10000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x80,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621980 <unknown>
+
+fmla    za.d[w8, 0], {z12.d - z13.d}, z2.d  // 11000001-01100010-00011001-10000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x80,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621980 <unknown>
+
+fmla    za.d[w10, 1, vgx2], {z1.d, z2.d}, z10.d  // 11000001, 01101010, 01011000, 00100001
+// CHECK-INST: fmla    za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x21,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5821 <unknown>
+
+fmla    za.d[w10, 1], {z1.d - z2.d}, z10.d  // 11000001-01101010-01011000-00100001
+// CHECK-INST: fmla    za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x21,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5821 <unknown>
+
+fmla    za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d  // 11000001, 01101110, 00011010, 11000101
+// CHECK-INST: fmla    za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xc5,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1ac5 <unknown>
+
+fmla    za.d[w8, 5], {z22.d - z23.d}, z14.d  // 11000001-01101110-00011010-11000101
+// CHECK-INST: fmla    za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xc5,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1ac5 <unknown>
+
+fmla    za.d[w11, 2, vgx2], {z9.d, z10.d}, z1.d  // 11000001, 01100001, 01111001, 00100010
+// CHECK-INST: fmla    za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x22,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1617922 <unknown>
+
+fmla    za.d[w11, 2], {z9.d - z10.d}, z1.d  // 11000001-01100001-01111001-00100010
+// CHECK-INST: fmla    za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x22,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1617922 <unknown>
+
+fmla    za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d  // 11000001, 01101011, 00111001, 10000111
+// CHECK-INST: fmla    za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x87,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b3987 <unknown>
+
+fmla    za.d[w9, 7], {z12.d - z13.d}, z11.d  // 11000001-01101011-00111001-10000111
+// CHECK-INST: fmla    za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x87,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b3987 <unknown>
+
+
+fmla    za.s[w8, 0, vgx2], {z0.s, z1.s}, z0.s  // 11000001, 00100000, 00011000, 00000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x00,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201800 <unknown>
+
+fmla    za.s[w8, 0], {z0.s - z1.s}, z0.s  // 11000001-00100000-00011000-00000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x00,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201800 <unknown>
+
+fmla    za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s  // 11000001, 00100101, 01011001, 01000101
+// CHECK-INST: fmla    za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x45,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1255945 <unknown>
+
+fmla    za.s[w10, 5], {z10.s - z11.s}, z5.s  // 11000001-00100101-01011001-01000101
+// CHECK-INST: fmla    za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x45,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1255945 <unknown>
+
+fmla    za.s[w11, 7, vgx2], {z13.s, z14.s}, z8.s  // 11000001, 00101000, 01111001, 10100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xa7,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879a7 <unknown>
+
+fmla    za.s[w11, 7], {z13.s - z14.s}, z8.s  // 11000001-00101000-01111001-10100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xa7,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879a7 <unknown>
+
+fmla    za.s[w11, 7, vgx2], {z31.s, z0.s}, z15.s  // 11000001, 00101111, 01111011, 11100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xe7,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7be7 <unknown>
+
+fmla    za.s[w11, 7], {z31.s - z0.s}, z15.s  // 11000001-00101111-01111011-11100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xe7,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7be7 <unknown>
+
+fmla    za.s[w8, 5, vgx2], {z17.s, z18.s}, z0.s  // 11000001, 00100000, 00011010, 00100101
+// CHECK-INST: fmla    za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x25,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a25 <unknown>
+
+fmla    za.s[w8, 5], {z17.s - z18.s}, z0.s  // 11000001-00100000-00011010-00100101
+// CHECK-INST: fmla    za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x25,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a25 <unknown>
+
+fmla    za.s[w8, 1, vgx2], {z1.s, z2.s}, z14.s  // 11000001, 00101110, 00011000, 00100001
+// CHECK-INST: fmla    za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x21,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1821 <unknown>
+
+fmla    za.s[w8, 1], {z1.s - z2.s}, z14.s  // 11000001-00101110-00011000-00100001
+// CHECK-INST: fmla    za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x21,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1821 <unknown>
+
+fmla    za.s[w10, 0, vgx2], {z19.s, z20.s}, z4.s  // 11000001, 00100100, 01011010, 01100000
+// CHECK-INST: fmla    za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x60,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a60 <unknown>
+
+fmla    za.s[w10, 0], {z19.s - z20.s}, z4.s  // 11000001-00100100-01011010-01100000
+// CHECK-INST: fmla    za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x60,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a60 <unknown>
+
+fmla    za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s  // 11000001, 00100010, 00011001, 10000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x80,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221980 <unknown>
+
+fmla    za.s[w8, 0], {z12.s - z13.s}, z2.s  // 11000001-00100010-00011001-10000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x80,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221980 <unknown>
+
+fmla    za.s[w10, 1, vgx2], {z1.s, z2.s}, z10.s  // 11000001, 00101010, 01011000, 00100001
+// CHECK-INST: fmla    za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x21,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5821 <unknown>
+
+fmla    za.s[w10, 1], {z1.s - z2.s}, z10.s  // 11000001-00101010-01011000-00100001
+// CHECK-INST: fmla    za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x21,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5821 <unknown>
+
+fmla    za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s  // 11000001, 00101110, 00011010, 11000101
+// CHECK-INST: fmla    za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xc5,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1ac5 <unknown>
+
+fmla    za.s[w8, 5], {z22.s - z23.s}, z14.s  // 11000001-00101110-00011010-11000101
+// CHECK-INST: fmla    za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xc5,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1ac5 <unknown>
+
+fmla    za.s[w11, 2, vgx2], {z9.s, z10.s}, z1.s  // 11000001, 00100001, 01111001, 00100010
+// CHECK-INST: fmla    za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x22,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1217922 <unknown>
+
+fmla    za.s[w11, 2], {z9.s - z10.s}, z1.s  // 11000001-00100001-01111001-00100010
+// CHECK-INST: fmla    za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x22,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1217922 <unknown>
+
+fmla    za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s  // 11000001, 00101011, 00111001, 10000111
+// CHECK-INST: fmla    za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x87,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b3987 <unknown>
+
+fmla    za.s[w9, 7], {z12.s - z13.s}, z11.s  // 11000001-00101011-00111001-10000111
+// CHECK-INST: fmla    za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x87,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b3987 <unknown>
+
+
+fmla    za.d[w8, 0, vgx4], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x00,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701800 <unknown>
+
+fmla    za.d[w8, 0], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x00,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701800 <unknown>
+
+fmla    za.d[w10, 5, vgx4], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01000101
+// CHECK-INST: fmla    za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x45,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1755945 <unknown>
+
+fmla    za.d[w10, 5], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01000101
+// CHECK-INST: fmla    za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x45,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1755945 <unknown>
+
+fmla    za.d[w11, 7, vgx4], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xa7,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879a7 <unknown>
+
+fmla    za.d[w11, 7], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xa7,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879a7 <unknown>
+
+fmla    za.d[w11, 7, vgx4], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xe7,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7be7 <unknown>
+
+fmla    za.d[w11, 7], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11100111
+// CHECK-INST: fmla    za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xe7,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7be7 <unknown>
+
+fmla    za.d[w8, 5, vgx4], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00100101
+// CHECK-INST: fmla    za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x25,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a25 <unknown>
+
+fmla    za.d[w8, 5], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00100101
+// CHECK-INST: fmla    za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x25,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a25 <unknown>
+
+fmla    za.d[w8, 1, vgx4], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00100001
+// CHECK-INST: fmla    za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x21,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1821 <unknown>
+
+fmla    za.d[w8, 1], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00100001
+// CHECK-INST: fmla    za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x21,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1821 <unknown>
+
+fmla    za.d[w10, 0, vgx4], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01100000
+// CHECK-INST: fmla    za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x60,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a60 <unknown>
+
+fmla    za.d[w10, 0], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01100000
+// CHECK-INST: fmla    za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x60,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a60 <unknown>
+
+fmla    za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x80,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721980 <unknown>
+
+fmla    za.d[w8, 0], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10000000
+// CHECK-INST: fmla    za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x80,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721980 <unknown>
+
+fmla    za.d[w10, 1, vgx4], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00100001
+// CHECK-INST: fmla    za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x21,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5821 <unknown>
+
+fmla    za.d[w10, 1], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00100001
+// CHECK-INST: fmla    za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x21,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5821 <unknown>
+
+fmla    za.d[w8, 5, vgx4], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11000101
+// CHECK-INST: fmla    za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xc5,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1ac5 <unknown>
+
+fmla    za.d[w8, 5], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11000101
+// CHECK-INST: fmla    za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xc5,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1ac5 <unknown>
+
+fmla    za.d[w11, 2, vgx4], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00100010
+// CHECK-INST: fmla    za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x22,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1717922 <unknown>
+
+fmla    za.d[w11, 2], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00100010
+// CHECK-INST: fmla    za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x22,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1717922 <unknown>
+
+fmla    za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10000111
+// CHECK-INST: fmla    za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x87,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b3987 <unknown>
+
+fmla    za.d[w9, 7], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10000111
+// CHECK-INST: fmla    za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x87,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b3987 <unknown>
+
+
+fmla    za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x00,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301800 <unknown>
+
+fmla    za.s[w8, 0], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x00,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301800 <unknown>
+
+fmla    za.s[w10, 5, vgx4], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01000101
+// CHECK-INST: fmla    za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x45,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1355945 <unknown>
+
+fmla    za.s[w10, 5], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01000101
+// CHECK-INST: fmla    za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x45,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1355945 <unknown>
+
+fmla    za.s[w11, 7, vgx4], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xa7,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879a7 <unknown>
+
+fmla    za.s[w11, 7], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xa7,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879a7 <unknown>
+
+fmla    za.s[w11, 7, vgx4], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xe7,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7be7 <unknown>
+
+fmla    za.s[w11, 7], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11100111
+// CHECK-INST: fmla    za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xe7,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7be7 <unknown>
+
+fmla    za.s[w8, 5, vgx4], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00100101
+// CHECK-INST: fmla    za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x25,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a25 <unknown>
+
+fmla    za.s[w8, 5], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00100101
+// CHECK-INST: fmla    za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x25,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a25 <unknown>
+
+fmla    za.s[w8, 1, vgx4], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00100001
+// CHECK-INST: fmla    za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x21,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1821 <unknown>
+
+fmla    za.s[w8, 1], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00100001
+// CHECK-INST: fmla    za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x21,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1821 <unknown>
+
+fmla    za.s[w10, 0, vgx4], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01100000
+// CHECK-INST: fmla    za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x60,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a60 <unknown>
+
+fmla    za.s[w10, 0], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01100000
+// CHECK-INST: fmla    za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x60,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a60 <unknown>
+
+fmla    za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x80,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321980 <unknown>
+
+fmla    za.s[w8, 0], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10000000
+// CHECK-INST: fmla    za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x80,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321980 <unknown>
+
+fmla    za.s[w10, 1, vgx4], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00100001
+// CHECK-INST: fmla    za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x21,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5821 <unknown>
+
+fmla    za.s[w10, 1], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00100001
+// CHECK-INST: fmla    za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x21,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5821 <unknown>
+
+fmla    za.s[w8, 5, vgx4], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11000101
+// CHECK-INST: fmla    za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xc5,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1ac5 <unknown>
+
+fmla    za.s[w8, 5], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11000101
+// CHECK-INST: fmla    za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xc5,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1ac5 <unknown>
+
+fmla    za.s[w11, 2, vgx4], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00100010
+// CHECK-INST: fmla    za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x22,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1317922 <unknown>
+
+fmla    za.s[w11, 2], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00100010
+// CHECK-INST: fmla    za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x22,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1317922 <unknown>
+
+fmla    za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10000111
+// CHECK-INST: fmla    za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x87,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b3987 <unknown>
+
+fmla    za.s[w9, 7], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10000111
+// CHECK-INST: fmla    za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x87,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b3987 <unknown>
+

diff  --git a/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s
new file mode 100644
index 0000000000000..9e762498ce0c1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Out of range index offset
+
+fmls za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fmls za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmls za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: fmls za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+fmls za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmls za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmls za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: fmls za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Matrix Operand
+
+fmls za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d
+// CHECK-NEXT: fmls za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+fmls za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmls za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+

diff  --git a/llvm/test/MC/AArch64/SME2/fmls.s b/llvm/test/MC/AArch64/SME2/fmls.s
new file mode 100644
index 0000000000000..b3d1f188fdd56
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/fmls.s
@@ -0,0 +1,575 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-f64f64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-f64f64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-f64f64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+fmls    za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d  // 11000001, 01100000, 00011000, 00001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x08,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601808 <unknown>
+
+fmls    za.d[w8, 0], {z0.d - z1.d}, z0.d  // 11000001-01100000-00011000-00001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x08,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601808 <unknown>
+
+fmls    za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d  // 11000001, 01100101, 01011001, 01001101
+// CHECK-INST: fmls    za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x4d,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c165594d <unknown>
+
+fmls    za.d[w10, 5], {z10.d - z11.d}, z5.d  // 11000001-01100101-01011001-01001101
+// CHECK-INST: fmls    za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x4d,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c165594d <unknown>
+
+fmls    za.d[w11, 7, vgx2], {z13.d, z14.d}, z8.d  // 11000001, 01101000, 01111001, 10101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xaf,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879af <unknown>
+
+fmls    za.d[w11, 7], {z13.d - z14.d}, z8.d  // 11000001-01101000-01111001-10101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xaf,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879af <unknown>
+
+fmls    za.d[w11, 7, vgx2], {z31.d, z0.d}, z15.d  // 11000001, 01101111, 01111011, 11101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xef,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bef <unknown>
+
+fmls    za.d[w11, 7], {z31.d - z0.d}, z15.d  // 11000001-01101111-01111011-11101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xef,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bef <unknown>
+
+fmls    za.d[w8, 5, vgx2], {z17.d, z18.d}, z0.d  // 11000001, 01100000, 00011010, 00101101
+// CHECK-INST: fmls    za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x2d,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a2d <unknown>
+
+fmls    za.d[w8, 5], {z17.d - z18.d}, z0.d  // 11000001-01100000-00011010-00101101
+// CHECK-INST: fmls    za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x2d,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a2d <unknown>
+
+fmls    za.d[w8, 1, vgx2], {z1.d, z2.d}, z14.d  // 11000001, 01101110, 00011000, 00101001
+// CHECK-INST: fmls    za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x29,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1829 <unknown>
+
+fmls    za.d[w8, 1], {z1.d - z2.d}, z14.d  // 11000001-01101110-00011000-00101001
+// CHECK-INST: fmls    za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x29,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1829 <unknown>
+
+fmls    za.d[w10, 0, vgx2], {z19.d, z20.d}, z4.d  // 11000001, 01100100, 01011010, 01101000
+// CHECK-INST: fmls    za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x68,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a68 <unknown>
+
+fmls    za.d[w10, 0], {z19.d - z20.d}, z4.d  // 11000001-01100100-01011010-01101000
+// CHECK-INST: fmls    za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x68,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a68 <unknown>
+
+fmls    za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d  // 11000001, 01100010, 00011001, 10001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x88,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621988 <unknown>
+
+fmls    za.d[w8, 0], {z12.d - z13.d}, z2.d  // 11000001-01100010-00011001-10001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x88,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621988 <unknown>
+
+fmls    za.d[w10, 1, vgx2], {z1.d, z2.d}, z10.d  // 11000001, 01101010, 01011000, 00101001
+// CHECK-INST: fmls    za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x29,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5829 <unknown>
+
+fmls    za.d[w10, 1], {z1.d - z2.d}, z10.d  // 11000001-01101010-01011000-00101001
+// CHECK-INST: fmls    za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x29,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5829 <unknown>
+
+fmls    za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d  // 11000001, 01101110, 00011010, 11001101
+// CHECK-INST: fmls    za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xcd,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1acd <unknown>
+
+fmls    za.d[w8, 5], {z22.d - z23.d}, z14.d  // 11000001-01101110-00011010-11001101
+// CHECK-INST: fmls    za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xcd,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1acd <unknown>
+
+fmls    za.d[w11, 2, vgx2], {z9.d, z10.d}, z1.d  // 11000001, 01100001, 01111001, 00101010
+// CHECK-INST: fmls    za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x2a,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c161792a <unknown>
+
+fmls    za.d[w11, 2], {z9.d - z10.d}, z1.d  // 11000001-01100001-01111001-00101010
+// CHECK-INST: fmls    za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x2a,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c161792a <unknown>
+
+fmls    za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d  // 11000001, 01101011, 00111001, 10001111
+// CHECK-INST: fmls    za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x8f,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b398f <unknown>
+
+fmls    za.d[w9, 7], {z12.d - z13.d}, z11.d  // 11000001-01101011-00111001-10001111
+// CHECK-INST: fmls    za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x8f,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b398f <unknown>
+
+
+fmls    za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s  // 11000001, 00100101, 01011001, 01001101
+// CHECK-INST: fmls    za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x4d,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c125594d <unknown>
+
+fmls    za.s[w10, 5], {z10.s - z11.s}, z5.s  // 11000001-00100101-01011001-01001101
+// CHECK-INST: fmls    za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x4d,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c125594d <unknown>
+
+fmls    za.s[w11, 7, vgx2], {z13.s, z14.s}, z8.s  // 11000001, 00101000, 01111001, 10101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xaf,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879af <unknown>
+
+fmls    za.s[w11, 7], {z13.s - z14.s}, z8.s  // 11000001-00101000-01111001-10101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xaf,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879af <unknown>
+
+fmls    za.s[w11, 7, vgx2], {z31.s, z0.s}, z15.s  // 11000001, 00101111, 01111011, 11101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xef,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bef <unknown>
+
+fmls    za.s[w11, 7], {z31.s - z0.s}, z15.s  // 11000001-00101111-01111011-11101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xef,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bef <unknown>
+
+fmls    za.s[w8, 5, vgx2], {z17.s, z18.s}, z0.s  // 11000001, 00100000, 00011010, 00101101
+// CHECK-INST: fmls    za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x2d,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a2d <unknown>
+
+fmls    za.s[w8, 5], {z17.s - z18.s}, z0.s  // 11000001-00100000-00011010-00101101
+// CHECK-INST: fmls    za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x2d,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a2d <unknown>
+
+fmls    za.s[w8, 1, vgx2], {z1.s, z2.s}, z14.s  // 11000001, 00101110, 00011000, 00101001
+// CHECK-INST: fmls    za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x29,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1829 <unknown>
+
+fmls    za.s[w8, 1], {z1.s - z2.s}, z14.s  // 11000001-00101110-00011000-00101001
+// CHECK-INST: fmls    za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x29,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1829 <unknown>
+
+fmls    za.s[w10, 0, vgx2], {z19.s, z20.s}, z4.s  // 11000001, 00100100, 01011010, 01101000
+// CHECK-INST: fmls    za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x68,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a68 <unknown>
+
+fmls    za.s[w10, 0], {z19.s - z20.s}, z4.s  // 11000001-00100100-01011010-01101000
+// CHECK-INST: fmls    za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x68,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a68 <unknown>
+
+fmls    za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s  // 11000001, 00100010, 00011001, 10001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x88,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221988 <unknown>
+
+fmls    za.s[w8, 0], {z12.s - z13.s}, z2.s  // 11000001-00100010-00011001-10001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x88,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221988 <unknown>
+
+fmls    za.s[w10, 1, vgx2], {z1.s, z2.s}, z10.s  // 11000001, 00101010, 01011000, 00101001
+// CHECK-INST: fmls    za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x29,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5829 <unknown>
+
+fmls    za.s[w10, 1], {z1.s - z2.s}, z10.s  // 11000001-00101010-01011000-00101001
+// CHECK-INST: fmls    za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x29,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5829 <unknown>
+
+fmls    za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s  // 11000001, 00101110, 00011010, 11001101
+// CHECK-INST: fmls    za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xcd,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1acd <unknown>
+
+fmls    za.s[w8, 5], {z22.s - z23.s}, z14.s  // 11000001-00101110-00011010-11001101
+// CHECK-INST: fmls    za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xcd,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1acd <unknown>
+
+fmls    za.s[w11, 2, vgx2], {z9.s, z10.s}, z1.s  // 11000001, 00100001, 01111001, 00101010
+// CHECK-INST: fmls    za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x2a,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c121792a <unknown>
+
+fmls    za.s[w11, 2], {z9.s - z10.s}, z1.s  // 11000001-00100001-01111001-00101010
+// CHECK-INST: fmls    za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x2a,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c121792a <unknown>
+
+fmls    za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s  // 11000001, 00101011, 00111001, 10001111
+// CHECK-INST: fmls    za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x8f,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b398f <unknown>
+
+fmls    za.s[w9, 7], {z12.s - z13.s}, z11.s  // 11000001-00101011-00111001-10001111
+// CHECK-INST: fmls    za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x8f,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b398f <unknown>
+
+
+fmls    za.d[w8, 0], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x08,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701808 <unknown>
+
+fmls    za.d[w10, 5, vgx4], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01001101
+// CHECK-INST: fmls    za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x4d,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c175594d <unknown>
+
+fmls    za.d[w10, 5], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01001101
+// CHECK-INST: fmls    za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x4d,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c175594d <unknown>
+
+fmls    za.d[w11, 7, vgx4], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xaf,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879af <unknown>
+
+fmls    za.d[w11, 7], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xaf,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879af <unknown>
+
+fmls    za.d[w11, 7, vgx4], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xef,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bef <unknown>
+
+fmls    za.d[w11, 7], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11101111
+// CHECK-INST: fmls    za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xef,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bef <unknown>
+
+fmls    za.d[w8, 5, vgx4], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00101101
+// CHECK-INST: fmls    za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x2d,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a2d <unknown>
+
+fmls    za.d[w8, 5], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00101101
+// CHECK-INST: fmls    za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x2d,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a2d <unknown>
+
+fmls    za.d[w8, 1, vgx4], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00101001
+// CHECK-INST: fmls    za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x29,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1829 <unknown>
+
+fmls    za.d[w8, 1], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00101001
+// CHECK-INST: fmls    za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x29,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1829 <unknown>
+
+fmls    za.d[w10, 0, vgx4], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01101000
+// CHECK-INST: fmls    za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x68,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a68 <unknown>
+
+fmls    za.d[w10, 0], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01101000
+// CHECK-INST: fmls    za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x68,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a68 <unknown>
+
+fmls    za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x88,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721988 <unknown>
+
+fmls    za.d[w8, 0], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10001000
+// CHECK-INST: fmls    za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x88,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721988 <unknown>
+
+fmls    za.d[w10, 1, vgx4], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00101001
+// CHECK-INST: fmls    za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x29,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5829 <unknown>
+
+fmls    za.d[w10, 1], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00101001
+// CHECK-INST: fmls    za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x29,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5829 <unknown>
+
+fmls    za.d[w8, 5, vgx4], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11001101
+// CHECK-INST: fmls    za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xcd,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1acd <unknown>
+
+fmls    za.d[w8, 5], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11001101
+// CHECK-INST: fmls    za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xcd,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1acd <unknown>
+
+fmls    za.d[w11, 2, vgx4], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00101010
+// CHECK-INST: fmls    za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x2a,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c171792a <unknown>
+
+fmls    za.d[w11, 2], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00101010
+// CHECK-INST: fmls    za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x2a,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c171792a <unknown>
+
+fmls    za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10001111
+// CHECK-INST: fmls    za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x8f,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b398f <unknown>
+
+fmls    za.d[w9, 7], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10001111
+// CHECK-INST: fmls    za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x8f,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b398f <unknown>
+
+
+fmls    za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x08,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301808 <unknown>
+
+fmls    za.s[w8, 0], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x08,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301808 <unknown>
+
+fmls    za.s[w10, 5, vgx4], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01001101
+// CHECK-INST: fmls    za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x4d,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c135594d <unknown>
+
+fmls    za.s[w10, 5], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01001101
+// CHECK-INST: fmls    za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x4d,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c135594d <unknown>
+
+fmls    za.s[w11, 7, vgx4], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xaf,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879af <unknown>
+
+fmls    za.s[w11, 7], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xaf,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879af <unknown>
+
+fmls    za.s[w11, 7, vgx4], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xef,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bef <unknown>
+
+fmls    za.s[w11, 7], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11101111
+// CHECK-INST: fmls    za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xef,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bef <unknown>
+
+fmls    za.s[w8, 5, vgx4], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00101101
+// CHECK-INST: fmls    za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x2d,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a2d <unknown>
+
+fmls    za.s[w8, 5], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00101101
+// CHECK-INST: fmls    za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x2d,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a2d <unknown>
+
+fmls    za.s[w8, 1, vgx4], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00101001
+// CHECK-INST: fmls    za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x29,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1829 <unknown>
+
+fmls    za.s[w8, 1], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00101001
+// CHECK-INST: fmls    za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x29,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1829 <unknown>
+
+fmls    za.s[w10, 0, vgx4], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01101000
+// CHECK-INST: fmls    za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x68,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a68 <unknown>
+
+fmls    za.s[w10, 0], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01101000
+// CHECK-INST: fmls    za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x68,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a68 <unknown>
+
+fmls    za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x88,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321988 <unknown>
+
+fmls    za.s[w8, 0], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10001000
+// CHECK-INST: fmls    za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x88,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321988 <unknown>
+
+fmls    za.s[w10, 1, vgx4], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00101001
+// CHECK-INST: fmls    za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x29,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5829 <unknown>
+
+fmls    za.s[w10, 1], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00101001
+// CHECK-INST: fmls    za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x29,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5829 <unknown>
+
+fmls    za.s[w8, 5, vgx4], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11001101
+// CHECK-INST: fmls    za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xcd,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1acd <unknown>
+
+fmls    za.s[w8, 5], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11001101
+// CHECK-INST: fmls    za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xcd,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1acd <unknown>
+
+fmls    za.s[w11, 2, vgx4], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00101010
+// CHECK-INST: fmls    za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x2a,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c131792a <unknown>
+
+fmls    za.s[w11, 2], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00101010
+// CHECK-INST: fmls    za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x2a,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c131792a <unknown>
+
+fmls    za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10001111
+// CHECK-INST: fmls    za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x8f,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b398f <unknown>
+
+fmls    za.s[w9, 7], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10001111
+// CHECK-INST: fmls    za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x8f,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b398f <unknown>
+

diff  --git a/llvm/test/MC/AArch64/SME2/sub-diagnostics.s b/llvm/test/MC/AArch64/SME2/sub-diagnostics.s
new file mode 100644
index 0000000000000..2c8dcb325bae7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/sub-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Out of range index offset
+
+sub za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: sub za.s[w8, 8], {z20.s-z21.s}, z10.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sub za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7].
+// CHECK-NEXT: sub za.d[w8, -1, vgx4], {z0.s-z3.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+sub za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: sub za.d[w7, 0], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sub za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11]
+// CHECK-NEXT: sub za.s[w12, 0], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid Matrix Operand
+
+sub za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d
+// CHECK-NEXT: sub za.h[w8, #0], {z0.h-z3.h}, z4.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+sub za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.s[w8, 0, vgx4], {z0.s-z1.s}, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sub za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: za.d[w8, 0, vgx2], {z0.d-z3.d}, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+

diff  --git a/llvm/test/MC/AArch64/SME2/sub.s b/llvm/test/MC/AArch64/SME2/sub.s
new file mode 100644
index 0000000000000..0ca5c3ba4ccd8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/sub.s
@@ -0,0 +1,593 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+sub     za.s[w8, 0, vgx2], {z0.s, z1.s}, z0.s  // 11000001, 00100000, 00011000, 00011000
+// CHECK-INST: sub     za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x18,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201818 <unknown>
+
+sub     za.s[w8, 0], {z0.s - z1.s}, z0.s  // 11000001-00100000-00011000-00011000
+// CHECK-INST: sub     za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x18,0x18,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201818 <unknown>
+
+sub     za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s  // 11000001, 00100101, 01011001, 01011101
+// CHECK-INST: sub     za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x5d,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c125595d <unknown>
+
+sub     za.s[w10, 5], {z10.s - z11.s}, z5.s  // 11000001-00100101-01011001-01011101
+// CHECK-INST: sub     za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s
+// CHECK-ENCODING: [0x5d,0x59,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c125595d <unknown>
+
+sub     za.s[w11, 7, vgx2], {z13.s, z14.s}, z8.s  // 11000001, 00101000, 01111001, 10111111
+// CHECK-INST: sub     za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xbf,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879bf <unknown>
+
+sub     za.s[w11, 7], {z13.s - z14.s}, z8.s  // 11000001-00101000-01111001-10111111
+// CHECK-INST: sub     za.s[w11, 7, vgx2], { z13.s, z14.s }, z8.s
+// CHECK-ENCODING: [0xbf,0x79,0x28,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12879bf <unknown>
+
+sub     za.s[w11, 7, vgx2], {z31.s, z0.s}, z15.s  // 11000001, 00101111, 01111011, 11111111
+// CHECK-INST: sub     za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xff,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bff <unknown>
+
+sub     za.s[w11, 7], {z31.s - z0.s}, z15.s  // 11000001-00101111-01111011-11111111
+// CHECK-INST: sub     za.s[w11, 7, vgx2], { z31.s, z0.s }, z15.s
+// CHECK-ENCODING: [0xff,0x7b,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12f7bff <unknown>
+
+sub     za.s[w8, 5, vgx2], {z17.s, z18.s}, z0.s  // 11000001, 00100000, 00011010, 00111101
+// CHECK-INST: sub     za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x3d,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a3d <unknown>
+
+sub     za.s[w8, 5], {z17.s - z18.s}, z0.s  // 11000001-00100000-00011010-00111101
+// CHECK-INST: sub     za.s[w8, 5, vgx2], { z17.s, z18.s }, z0.s
+// CHECK-ENCODING: [0x3d,0x1a,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1201a3d <unknown>
+
+sub     za.s[w8, 1, vgx2], {z1.s, z2.s}, z14.s  // 11000001, 00101110, 00011000, 00111001
+// CHECK-INST: sub     za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x39,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1839 <unknown>
+
+sub     za.s[w8, 1], {z1.s - z2.s}, z14.s  // 11000001-00101110-00011000-00111001
+// CHECK-INST: sub     za.s[w8, 1, vgx2], { z1.s, z2.s }, z14.s
+// CHECK-ENCODING: [0x39,0x18,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1839 <unknown>
+
+sub     za.s[w10, 0, vgx2], {z19.s, z20.s}, z4.s  // 11000001, 00100100, 01011010, 01111000
+// CHECK-INST: sub     za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x78,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a78 <unknown>
+
+sub     za.s[w10, 0], {z19.s - z20.s}, z4.s  // 11000001-00100100-01011010-01111000
+// CHECK-INST: sub     za.s[w10, 0, vgx2], { z19.s, z20.s }, z4.s
+// CHECK-ENCODING: [0x78,0x5a,0x24,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1245a78 <unknown>
+
+sub     za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s  // 11000001, 00100010, 00011001, 10011000
+// CHECK-INST: sub     za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x98,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221998 <unknown>
+
+sub     za.s[w8, 0], {z12.s - z13.s}, z2.s  // 11000001-00100010-00011001-10011000
+// CHECK-INST: sub     za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s
+// CHECK-ENCODING: [0x98,0x19,0x22,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1221998 <unknown>
+
+sub     za.s[w10, 1, vgx2], {z1.s, z2.s}, z10.s  // 11000001, 00101010, 01011000, 00111001
+// CHECK-INST: sub     za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x39,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5839 <unknown>
+
+sub     za.s[w10, 1], {z1.s - z2.s}, z10.s  // 11000001-00101010-01011000-00111001
+// CHECK-INST: sub     za.s[w10, 1, vgx2], { z1.s, z2.s }, z10.s
+// CHECK-ENCODING: [0x39,0x58,0x2a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12a5839 <unknown>
+
+sub     za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s  // 11000001, 00101110, 00011010, 11011101
+// CHECK-INST: sub     za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xdd,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1add <unknown>
+
+sub     za.s[w8, 5], {z22.s - z23.s}, z14.s  // 11000001-00101110-00011010-11011101
+// CHECK-INST: sub     za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s
+// CHECK-ENCODING: [0xdd,0x1a,0x2e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12e1add <unknown>
+
+sub     za.s[w11, 2, vgx2], {z9.s, z10.s}, z1.s  // 11000001, 00100001, 01111001, 00111010
+// CHECK-INST: sub     za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x3a,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c121793a <unknown>
+
+sub     za.s[w11, 2], {z9.s - z10.s}, z1.s  // 11000001-00100001-01111001-00111010
+// CHECK-INST: sub     za.s[w11, 2, vgx2], { z9.s, z10.s }, z1.s
+// CHECK-ENCODING: [0x3a,0x79,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c121793a <unknown>
+
+sub     za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s  // 11000001, 00101011, 00111001, 10011111
+// CHECK-INST: sub     za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x9f,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b399f <unknown>
+
+sub     za.s[w9, 7], {z12.s - z13.s}, z11.s  // 11000001-00101011-00111001-10011111
+// CHECK-INST: sub     za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s
+// CHECK-ENCODING: [0x9f,0x39,0x2b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c12b399f <unknown>
+
+
+sub     za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d  // 11000001, 01100000, 00011000, 00011000
+// CHECK-INST: sub     za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x18,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601818 <unknown>
+
+sub     za.d[w8, 0], {z0.d - z1.d}, z0.d  // 11000001-01100000-00011000-00011000
+// CHECK-INST: sub     za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x18,0x18,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601818 <unknown>
+
+sub     za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d  // 11000001, 01100101, 01011001, 01011101
+// CHECK-INST: sub     za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x5d,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c165595d <unknown>
+
+sub     za.d[w10, 5], {z10.d - z11.d}, z5.d  // 11000001-01100101-01011001-01011101
+// CHECK-INST: sub     za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d
+// CHECK-ENCODING: [0x5d,0x59,0x65,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c165595d <unknown>
+
+sub     za.d[w11, 7, vgx2], {z13.d, z14.d}, z8.d  // 11000001, 01101000, 01111001, 10111111
+// CHECK-INST: sub     za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xbf,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879bf <unknown>
+
+sub     za.d[w11, 7], {z13.d - z14.d}, z8.d  // 11000001-01101000-01111001-10111111
+// CHECK-INST: sub     za.d[w11, 7, vgx2], { z13.d, z14.d }, z8.d
+// CHECK-ENCODING: [0xbf,0x79,0x68,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16879bf <unknown>
+
+sub     za.d[w11, 7, vgx2], {z31.d, z0.d}, z15.d  // 11000001, 01101111, 01111011, 11111111
+// CHECK-INST: sub     za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xff,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bff <unknown>
+
+sub     za.d[w11, 7], {z31.d - z0.d}, z15.d  // 11000001-01101111-01111011-11111111
+// CHECK-INST: sub     za.d[w11, 7, vgx2], { z31.d, z0.d }, z15.d
+// CHECK-ENCODING: [0xff,0x7b,0x6f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16f7bff <unknown>
+
+sub     za.d[w8, 5, vgx2], {z17.d, z18.d}, z0.d  // 11000001, 01100000, 00011010, 00111101
+// CHECK-INST: sub     za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x3d,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a3d <unknown>
+
+sub     za.d[w8, 5], {z17.d - z18.d}, z0.d  // 11000001-01100000-00011010-00111101
+// CHECK-INST: sub     za.d[w8, 5, vgx2], { z17.d, z18.d }, z0.d
+// CHECK-ENCODING: [0x3d,0x1a,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1601a3d <unknown>
+
+sub     za.d[w8, 1, vgx2], {z1.d, z2.d}, z14.d  // 11000001, 01101110, 00011000, 00111001
+// CHECK-INST: sub     za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x39,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1839 <unknown>
+
+sub     za.d[w8, 1], {z1.d - z2.d}, z14.d  // 11000001-01101110-00011000-00111001
+// CHECK-INST: sub     za.d[w8, 1, vgx2], { z1.d, z2.d }, z14.d
+// CHECK-ENCODING: [0x39,0x18,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1839 <unknown>
+
+sub     za.d[w10, 0, vgx2], {z19.d, z20.d}, z4.d  // 11000001, 01100100, 01011010, 01111000
+// CHECK-INST: sub     za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x78,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a78 <unknown>
+
+sub     za.d[w10, 0], {z19.d - z20.d}, z4.d  // 11000001-01100100-01011010-01111000
+// CHECK-INST: sub     za.d[w10, 0, vgx2], { z19.d, z20.d }, z4.d
+// CHECK-ENCODING: [0x78,0x5a,0x64,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1645a78 <unknown>
+
+sub     za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d  // 11000001, 01100010, 00011001, 10011000
+// CHECK-INST: sub     za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x98,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621998 <unknown>
+
+sub     za.d[w8, 0], {z12.d - z13.d}, z2.d  // 11000001-01100010-00011001-10011000
+// CHECK-INST: sub     za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d
+// CHECK-ENCODING: [0x98,0x19,0x62,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1621998 <unknown>
+
+sub     za.d[w10, 1, vgx2], {z1.d, z2.d}, z10.d  // 11000001, 01101010, 01011000, 00111001
+// CHECK-INST: sub     za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x39,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5839 <unknown>
+
+sub     za.d[w10, 1], {z1.d - z2.d}, z10.d  // 11000001-01101010-01011000-00111001
+// CHECK-INST: sub     za.d[w10, 1, vgx2], { z1.d, z2.d }, z10.d
+// CHECK-ENCODING: [0x39,0x58,0x6a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16a5839 <unknown>
+
+sub     za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d  // 11000001, 01101110, 00011010, 11011101
+// CHECK-INST: sub     za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xdd,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1add <unknown>
+
+sub     za.d[w8, 5], {z22.d - z23.d}, z14.d  // 11000001-01101110-00011010-11011101
+// CHECK-INST: sub     za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d
+// CHECK-ENCODING: [0xdd,0x1a,0x6e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16e1add <unknown>
+
+sub     za.d[w11, 2, vgx2], {z9.d, z10.d}, z1.d  // 11000001, 01100001, 01111001, 00111010
+// CHECK-INST: sub     za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x3a,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c161793a <unknown>
+
+sub     za.d[w11, 2], {z9.d - z10.d}, z1.d  // 11000001-01100001-01111001-00111010
+// CHECK-INST: sub     za.d[w11, 2, vgx2], { z9.d, z10.d }, z1.d
+// CHECK-ENCODING: [0x3a,0x79,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c161793a <unknown>
+
+sub     za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d  // 11000001, 01101011, 00111001, 10011111
+// CHECK-INST: sub     za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x9f,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b399f <unknown>
+
+sub     za.d[w9, 7], {z12.d - z13.d}, z11.d  // 11000001-01101011-00111001-10011111
+// CHECK-INST: sub     za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d
+// CHECK-ENCODING: [0x9f,0x39,0x6b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c16b399f <unknown>
+
+
+sub     za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00011000
+// CHECK-INST: sub     za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x18,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301818 <unknown>
+
+sub     za.s[w8, 0], {z0.s - z3.s}, z0.s  // 11000001-00110000-00011000-00011000
+// CHECK-INST: sub     za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x18,0x18,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301818 <unknown>
+
+sub     za.s[w10, 5, vgx4], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01011101
+// CHECK-INST: sub     za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x5d,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c135595d <unknown>
+
+sub     za.s[w10, 5], {z10.s - z13.s}, z5.s  // 11000001-00110101-01011001-01011101
+// CHECK-INST: sub     za.s[w10, 5, vgx4], { z10.s - z13.s }, z5.s
+// CHECK-ENCODING: [0x5d,0x59,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c135595d <unknown>
+
+sub     za.s[w11, 7, vgx4], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10111111
+// CHECK-INST: sub     za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xbf,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879bf <unknown>
+
+sub     za.s[w11, 7], {z13.s - z16.s}, z8.s  // 11000001-00111000-01111001-10111111
+// CHECK-INST: sub     za.s[w11, 7, vgx4], { z13.s - z16.s }, z8.s
+// CHECK-ENCODING: [0xbf,0x79,0x38,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13879bf <unknown>
+
+sub     za.s[w11, 7, vgx4], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11111111
+// CHECK-INST: sub     za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xff,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bff <unknown>
+
+sub     za.s[w11, 7], {z31.s - z2.s}, z15.s  // 11000001-00111111-01111011-11111111
+// CHECK-INST: sub     za.s[w11, 7, vgx4], { z31.s, z0.s, z1.s, z2.s }, z15.s
+// CHECK-ENCODING: [0xff,0x7b,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13f7bff <unknown>
+
+sub     za.s[w8, 5, vgx4], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00111101
+// CHECK-INST: sub     za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x3d,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a3d <unknown>
+
+sub     za.s[w8, 5], {z17.s - z20.s}, z0.s  // 11000001-00110000-00011010-00111101
+// CHECK-INST: sub     za.s[w8, 5, vgx4], { z17.s - z20.s }, z0.s
+// CHECK-ENCODING: [0x3d,0x1a,0x30,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1301a3d <unknown>
+
+sub     za.s[w8, 1, vgx4], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00111001
+// CHECK-INST: sub     za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x39,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1839 <unknown>
+
+sub     za.s[w8, 1], {z1.s - z4.s}, z14.s  // 11000001-00111110-00011000-00111001
+// CHECK-INST: sub     za.s[w8, 1, vgx4], { z1.s - z4.s }, z14.s
+// CHECK-ENCODING: [0x39,0x18,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1839 <unknown>
+
+sub     za.s[w10, 0, vgx4], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01111000
+// CHECK-INST: sub     za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x78,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a78 <unknown>
+
+sub     za.s[w10, 0], {z19.s - z22.s}, z4.s  // 11000001-00110100-01011010-01111000
+// CHECK-INST: sub     za.s[w10, 0, vgx4], { z19.s - z22.s }, z4.s
+// CHECK-ENCODING: [0x78,0x5a,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1345a78 <unknown>
+
+sub     za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10011000
+// CHECK-INST: sub     za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x98,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321998 <unknown>
+
+sub     za.s[w8, 0], {z12.s - z15.s}, z2.s  // 11000001-00110010-00011001-10011000
+// CHECK-INST: sub     za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s
+// CHECK-ENCODING: [0x98,0x19,0x32,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1321998 <unknown>
+
+sub     za.s[w10, 1, vgx4], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00111001
+// CHECK-INST: sub     za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x39,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5839 <unknown>
+
+sub     za.s[w10, 1], {z1.s - z4.s}, z10.s  // 11000001-00111010-01011000-00111001
+// CHECK-INST: sub     za.s[w10, 1, vgx4], { z1.s - z4.s }, z10.s
+// CHECK-ENCODING: [0x39,0x58,0x3a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13a5839 <unknown>
+
+sub     za.s[w8, 5, vgx4], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11011101
+// CHECK-INST: sub     za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xdd,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1add <unknown>
+
+sub     za.s[w8, 5], {z22.s - z25.s}, z14.s  // 11000001-00111110-00011010-11011101
+// CHECK-INST: sub     za.s[w8, 5, vgx4], { z22.s - z25.s }, z14.s
+// CHECK-ENCODING: [0xdd,0x1a,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13e1add <unknown>
+
+sub     za.s[w11, 2, vgx4], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00111010
+// CHECK-INST: sub     za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x3a,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c131793a <unknown>
+
+sub     za.s[w11, 2], {z9.s - z12.s}, z1.s  // 11000001-00110001-01111001-00111010
+// CHECK-INST: sub     za.s[w11, 2, vgx4], { z9.s - z12.s }, z1.s
+// CHECK-ENCODING: [0x3a,0x79,0x31,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c131793a <unknown>
+
+sub     za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10011111
+// CHECK-INST: sub     za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x9f,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b399f <unknown>
+
+sub     za.s[w9, 7], {z12.s - z15.s}, z11.s  // 11000001-00111011-00111001-10011111
+// CHECK-INST: sub     za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s
+// CHECK-ENCODING: [0x9f,0x39,0x3b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c13b399f <unknown>
+
+
+sub     za.d[w8, 0, vgx4], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00011000
+// CHECK-INST: sub     za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x18,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701818 <unknown>
+
+sub     za.d[w8, 0], {z0.d - z3.d}, z0.d  // 11000001-01110000-00011000-00011000
+// CHECK-INST: sub     za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x18,0x18,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701818 <unknown>
+
+sub     za.d[w10, 5, vgx4], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01011101
+// CHECK-INST: sub     za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x5d,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c175595d <unknown>
+
+sub     za.d[w10, 5], {z10.d - z13.d}, z5.d  // 11000001-01110101-01011001-01011101
+// CHECK-INST: sub     za.d[w10, 5, vgx4], { z10.d - z13.d }, z5.d
+// CHECK-ENCODING: [0x5d,0x59,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c175595d <unknown>
+
+sub     za.d[w11, 7, vgx4], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10111111
+// CHECK-INST: sub     za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xbf,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879bf <unknown>
+
+sub     za.d[w11, 7], {z13.d - z16.d}, z8.d  // 11000001-01111000-01111001-10111111
+// CHECK-INST: sub     za.d[w11, 7, vgx4], { z13.d - z16.d }, z8.d
+// CHECK-ENCODING: [0xbf,0x79,0x78,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17879bf <unknown>
+
+sub     za.d[w11, 7, vgx4], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11111111
+// CHECK-INST: sub     za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xff,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bff <unknown>
+
+sub     za.d[w11, 7], {z31.d - z2.d}, z15.d  // 11000001-01111111-01111011-11111111
+// CHECK-INST: sub     za.d[w11, 7, vgx4], { z31.d, z0.d, z1.d, z2.d }, z15.d
+// CHECK-ENCODING: [0xff,0x7b,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17f7bff <unknown>
+
+sub     za.d[w8, 5, vgx4], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00111101
+// CHECK-INST: sub     za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x3d,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a3d <unknown>
+
+sub     za.d[w8, 5], {z17.d - z20.d}, z0.d  // 11000001-01110000-00011010-00111101
+// CHECK-INST: sub     za.d[w8, 5, vgx4], { z17.d - z20.d }, z0.d
+// CHECK-ENCODING: [0x3d,0x1a,0x70,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1701a3d <unknown>
+
+sub     za.d[w8, 1, vgx4], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00111001
+// CHECK-INST: sub     za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x39,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1839 <unknown>
+
+sub     za.d[w8, 1], {z1.d - z4.d}, z14.d  // 11000001-01111110-00011000-00111001
+// CHECK-INST: sub     za.d[w8, 1, vgx4], { z1.d - z4.d }, z14.d
+// CHECK-ENCODING: [0x39,0x18,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1839 <unknown>
+
+sub     za.d[w10, 0, vgx4], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01111000
+// CHECK-INST: sub     za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x78,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a78 <unknown>
+
+sub     za.d[w10, 0], {z19.d - z22.d}, z4.d  // 11000001-01110100-01011010-01111000
+// CHECK-INST: sub     za.d[w10, 0, vgx4], { z19.d - z22.d }, z4.d
+// CHECK-ENCODING: [0x78,0x5a,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1745a78 <unknown>
+
+sub     za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10011000
+// CHECK-INST: sub     za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x98,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721998 <unknown>
+
+sub     za.d[w8, 0], {z12.d - z15.d}, z2.d  // 11000001-01110010-00011001-10011000
+// CHECK-INST: sub     za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d
+// CHECK-ENCODING: [0x98,0x19,0x72,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c1721998 <unknown>
+
+sub     za.d[w10, 1, vgx4], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00111001
+// CHECK-INST: sub     za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x39,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5839 <unknown>
+
+sub     za.d[w10, 1], {z1.d - z4.d}, z10.d  // 11000001-01111010-01011000-00111001
+// CHECK-INST: sub     za.d[w10, 1, vgx4], { z1.d - z4.d }, z10.d
+// CHECK-ENCODING: [0x39,0x58,0x7a,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17a5839 <unknown>
+
+sub     za.d[w8, 5, vgx4], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11011101
+// CHECK-INST: sub     za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xdd,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1add <unknown>
+
+sub     za.d[w8, 5], {z22.d - z25.d}, z14.d  // 11000001-01111110-00011010-11011101
+// CHECK-INST: sub     za.d[w8, 5, vgx4], { z22.d - z25.d }, z14.d
+// CHECK-ENCODING: [0xdd,0x1a,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17e1add <unknown>
+
+sub     za.d[w11, 2, vgx4], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00111010
+// CHECK-INST: sub     za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x3a,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c171793a <unknown>
+
+sub     za.d[w11, 2], {z9.d - z12.d}, z1.d  // 11000001-01110001-01111001-00111010
+// CHECK-INST: sub     za.d[w11, 2, vgx4], { z9.d - z12.d }, z1.d
+// CHECK-ENCODING: [0x3a,0x79,0x71,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c171793a <unknown>
+
+sub     za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10011111
+// CHECK-INST: sub     za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x9f,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b399f <unknown>
+
+sub     za.d[w9, 7], {z12.d - z15.d}, z11.d  // 11000001-01111011-00111001-10011111
+// CHECK-INST: sub     za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d
+// CHECK-ENCODING: [0x9f,0x39,0x7b,0xc1]
+// CHECK-ERROR: instruction requires: sme2
+// CHECK-UNKNOWN: c17b399f <unknown>
+


        


More information about the llvm-commits mailing list