[llvm] [llvm][AArch64][Assembly]: Add LUT assembly/disassembly. (PR #70802)

Thu Nov 2 07:20:16 PDT 2023

https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/70802

>From 79904dd9bddccef9e42065e075e35b03840020c6 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Tue, 31 Oct 2023 13:23:54 +0000
Subject: [PATCH] [llvm][AArch64][Assembly]: Add LUT assembly/disassembly.

This patch adds the feature flags of LUT and SME_LUTv2, and the assembly/disassembly
for the following instructions of NEON, SVE2 and SME2:
  * NEON:
    - LUT2
    - LUT4
  * SVE2:
    - LUTI2_ZZZI
    - LUTI4_ZZZI
    - LUTI4_Z2ZZI
  * SME:
    - MOVT
    - LUTI4_4ZZT2Z
    - LUTI4_S_4ZZT2Z

That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09

Change-Id: I10cc46e3fe7efb9f280e26e065b22908d3a0725c
---
 .../llvm/TargetParser/AArch64TargetParser.h   |  4 ++
 llvm/lib/Target/AArch64/AArch64.td            |  5 ++
 .../lib/Target/AArch64/AArch64InstrFormats.td | 48 ++++++++++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 11 ++++
 .../lib/Target/AArch64/AArch64RegisterInfo.td |  4 ++
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  9 +++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  9 +++
 .../AArch64/AsmParser/AArch64AsmParser.cpp    | 12 ++--
 .../MCTargetDesc/AArch64InstPrinter.cpp       |  4 ++
 llvm/lib/Target/AArch64/SMEInstrFormats.td    | 52 +++++++++++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 52 +++++++++++++++
 .../MC/AArch64/FP8/directive-arch-negative.s  | 18 +++++
 llvm/test/MC/AArch64/FP8/directive-arch.s     | 15 +++++
 llvm/test/MC/AArch64/FP8/luti2-diagnostics.s  | 37 +++++++++++
 llvm/test/MC/AArch64/FP8/luti2.s              | 41 ++++++++++++
 llvm/test/MC/AArch64/FP8/luti4-diagnostics.s  | 50 ++++++++++++++
 llvm/test/MC/AArch64/FP8/luti4.s              | 41 ++++++++++++
 .../MC/AArch64/FP8_SME2/lut-diagnostics.s     | 27 ++++++++
 llvm/test/MC/AArch64/FP8_SME2/lut.s           | 44 +++++++++++++
 .../MC/AArch64/FP8_SME2/movt-diagnostics.s    | 29 +++++++++
 llvm/test/MC/AArch64/FP8_SME2/movt.s          | 31 +++++++++
 .../MC/AArch64/FP8_SVE2/luti2-diagnostics.s   | 37 +++++++++++
 llvm/test/MC/AArch64/FP8_SVE2/luti2.s         | 54 +++++++++++++++
 .../MC/AArch64/FP8_SVE2/luti4-diagnostics.s   | 60 +++++++++++++++++
 llvm/test/MC/AArch64/FP8_SVE2/luti4.s         | 65 +++++++++++++++++++
 .../TargetParser/TargetParserTest.cpp         |  7 +-
 26 files changed, 761 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/FP8/luti2-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8/luti2.s
 create mode 100644 llvm/test/MC/AArch64/FP8/luti4-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8/luti4.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/lut-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/lut.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/movt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SME2/movt.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/luti2-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/luti2.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/luti4-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/FP8_SVE2/luti4.s

diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 80e3fe76e5c2527..48dac9395d58314 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -168,6 +168,8 @@ enum ArchExtKind : unsigned {
   AEK_SSVE_FP8DOT2 =  64, // FEAT_SSVE_FP8DOT2
   AEK_FP8DOT4 =       65, // FEAT_FP8DOT4
   AEK_SSVE_FP8DOT4 =  66, // FEAT_SSVE_FP8DOT4
+  AEK_LUT =           67, // FEAT_LUT
+  AEK_SME_LUTv2 =     68, // FEAT_SME_LUTv2
   AEK_NUM_EXTENSIONS
 };
 using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -285,6 +287,8 @@ inline constexpr ExtensionInfo Extensions[] = {
     {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0},
     {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "", 0},
     {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
+    {"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0},
+    {"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0},
     // Special cases
     {"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
 };
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index aa0efb3e6ec13d5..131086e12ce66b1 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -534,6 +534,11 @@ def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
 
 def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
   "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
+   "Enable Lookup Table instructions (FEAT_LUT)">;
+
+def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
+  "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;
 
 def FeatureAppleA7SysReg  : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
   "Apple A7 (the CPU formerly known as Cyclone)">;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ea965e2933c8dc6..f88f5a240a1fd7f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8119,6 +8119,54 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
                          V128, VecListFour128>;
 }
 
+//----------------------------------------------------------------------------
+// AdvSIMD LUT
+//----------------------------------------------------------------------------
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookupIndexed<bit Q, bits<5> opc, RegisterOperand vectype,
+                            RegisterOperand listtype, Operand idx_type,
+                            string asm, string kind>
+  : I<(outs vectype:$Rd),
+      (ins listtype:$Rn, vectype:$Rm, idx_type:$idx),
+      asm, "\t$Rd" # kind # ", $Rn, $Rm$idx", "", []>,
+    Sched<[]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  let Inst{31}    = 0;
+  let Inst{30}    = Q;
+  let Inst{29-24} = 0b001110;
+  let Inst{23-22} = opc{4-3};
+  let Inst{21}    = 0;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = 0;
+  let Inst{14-12} = opc{2-0};
+  let Inst{11-10} = 0b00;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
+multiclass BaseSIMDTableLookupIndexed2<string asm> {
+  def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b10,?,?,0b1}, V128, VecListOne16b, VectorIndexS, asm, ".16b"> {
+    bits<2> idx;
+    let Inst{14-13} = idx;
+  }
+  def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b11,?,?,?}, V128, VecListOne8h, VectorIndexH, asm, ".8h" > {
+    bits<3> idx;
+    let Inst{14-12} = idx;
+  }
+}
+
+multiclass BaseSIMDTableLookupIndexed4<string asm> {
+  def v16f8 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,0b10}, V128, VecListOne16b, VectorIndexD, asm, ".16b"> {
+    bit idx;
+    let Inst{14} = idx;
+  }
+  def v8f16 : BaseSIMDTableLookupIndexed<0b1, {0b01,?,?,0b1}, V128, VecListTwo8h, VectorIndexS, asm, ".8h" > {
+    bits<2> idx;
+    let Inst{14-13} = idx;
+  }
+}
 
 //----------------------------------------------------------------------------
 // AdvSIMD scalar DUP
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0125d3dbecf96cb..7b62263e0d55426 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -187,6 +187,10 @@ def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
                                  AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
                                                            (all_of FeatureSVE2, FeatureFP8DOT4)),
                                  "ssve-fp8dot4 or (sve2 and fp8dot4)">;
+def HasLUT          : Predicate<"Subtarget->hasLUT()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
+def HasSME_LUTv2     : Predicate<"Subtarget->hasSME_LUTv2()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
 
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
@@ -5964,6 +5968,13 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
                    (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
           (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
 
+//----------------------------------------------------------------------------
+// AdvSIMD LUT instructions
+//----------------------------------------------------------------------------
+let Predicates = [HasLUT] in {
+  defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
+  defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
+}
 
 //----------------------------------------------------------------------------
 // AdvSIMD scalar DUP instruction
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 497c143584d48d4..b70ab8568884784 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1268,6 +1268,10 @@ class ZPRVectorListMul<int ElementWidth, int NumRegs> : ZPRVectorList<ElementWid
 
 let EncoderMethod = "EncodeRegAsMultipleOf<2>",
     DecoderMethod = "DecodeZPR2Mul2RegisterClass" in {
+  def ZZ_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,0>"> {
+    let ParserMatchClass = ZPRVectorListMul<0, 2>;
+  }
+
   def ZZ_b_mul_r : RegisterOperand<ZPR2Mul2, "printTypedVectorList<0,'b'>"> {
     let ParserMatchClass = ZPRVectorListMul<8, 2>;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index f55b84b02f85162..7f568c9a225952e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -885,3 +885,12 @@ defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
 defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
 defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
 } //[HasSME2, HasFAMINMAX]
+
+let Predicates = [HasSME2, HasSME_LUTv2] in {
+defm MOVT : sme2_movt_zt_to_zt<"movt",  0b0011111>;
+def LUTI4_4ZZT2Z    : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
+} //[HasSME2, HasSME_LUTv2]
+
+let Predicates = [HasSME2p1, HasSME_LUTv2] in {
+def LUTI4_S_4ZZT2Z  : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
+} //[HasSME2p1, HasSME_LUTv2]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d186a21f7e7737f..fc6a6a88b4fc084 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4081,3 +4081,12 @@ defm FDOT_ZZZI_BtoS : sve_float_dot_indexed<0b1, 0b01, ZPR8, ZPR3b8, "fdot",
 // FP8 Widening Dot-Product - Group
 defm FDOT_ZZZ_BtoS : sve_float_dot<0b1, 0b1, ZPR32, ZPR8, "fdot", nxv16i8, null_frag>;
 }
+
+let Predicates = [HasSVE2orSME2, HasLUT] in {
+// LUTI2
+  defm LUTI2_ZZZI : sve2_luti2_vector_index<"luti2">;
+// LUTI4
+  defm LUTI4_ZZZI   : sve2_luti4_vector_index<"luti4">;
+// LUTI4 (two contiguous registers)
+  defm LUTI4_Z2ZZI  : sve2_luti4_vector_vg2_index<"luti4">;
+} // End HasSVE2orSME2, HasLUT
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 9c16d22677b9e09..fed7d02a7a5595a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3658,6 +3658,8 @@ static const struct Extension {
     {"ssve-fp8dot2", {AArch64::FeatureSSVE_FP8DOT2}},
     {"fp8dot4", {AArch64::FeatureFP8DOT4}},
     {"ssve-fp8dot4", {AArch64::FeatureSSVE_FP8DOT4}},
+    {"lut", {AArch64::FeatureLUT}},
+    {"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -4553,7 +4555,7 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
 
   Operands.push_back(AArch64Operand::CreateReg(
       RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext()));
-  Lex(); // Eat identifier token.
+  Lex(); // Eat register.
 
   // Check if register is followed by an index
   if (parseOptionalToken(AsmToken::LBrac)) {
@@ -4565,12 +4567,14 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
     if (!MCE)
       return TokError("immediate value expected for vector index");
-    if (parseToken(AsmToken::RBrac, "']' expected"))
-      return ParseStatus::Failure;
-
     Operands.push_back(AArch64Operand::CreateImm(
         MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
         getLoc(), getContext()));
+    if (parseOptionalToken(AsmToken::Comma))
+      if (parseOptionalMulOperand(Operands))
+        return MatchOperand_ParseFail;
+    if (parseToken(AsmToken::RBrac, "']' expected"))
+      return ParseStatus::Failure;
     Operands.push_back(
         AArch64Operand::CreateToken("]", getLoc(), getContext()));
   }
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 988c78699179f0c..c5de5b4de4aef3a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1740,6 +1740,10 @@ template <unsigned NumLanes, char LaneKind>
 void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
                                               const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
+  if (LaneKind == 0) {
+    printVectorList(MI, OpNum, STI, O, "");
+    return;
+  }
   std::string Suffix(".");
   if (NumLanes)
     Suffix += itostr(NumLanes) + LaneKind;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index d8b44c68fbdee10..ee943c87bc1235e 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3059,6 +3059,25 @@ class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
   let Inst{4-0}   = Rt;
 }
 
+// SME2 move vector to lookup table
+class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
+   : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
+        mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
+        "", []>, Sched<[]> {
+  bits<5> Zt;
+  bits<2> off2;
+  let Inst{31-14} = 0b110000000100111100;
+  let Inst{13-12} = off2;
+  let Inst{11-5}  = opc;
+  let Inst{4-0}   = Zt;
+}
+
+multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
+  def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
+  def : InstAlias<mnemonic # "\t$ZTt, $Zt",
+                 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
+}
+
 //===----------------------------------------------------------------------===//
 // SME2 lookup table expand one register
 class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
@@ -4713,3 +4732,36 @@ class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
 multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
   def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
 }
+
+// SME2 lookup table two source registers expand to four contiguous destination registers
+class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
+  : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+       mnemonic, "\t$Zd, $ZTt, $Zn",
+       "", []>, Sched<[]> {
+  bits<4> Zn;
+  bits<3> Zd;
+  let Inst{31-14} = 0b110000001000101100;
+  let Inst{13-12} = sz;
+  let Inst{11-10} = op;
+  let Inst{9-6}   = Zn;
+  let Inst{5}     = 0b0;
+  let Inst{4-2}   = Zd;
+  let Inst{1-0}   = 0b00;
+}
+
+// SME2 lookup table two source registers expand to four non-contiguous destination registers
+class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
+   : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
+        mnemonic, "\t$Zd, $ZTt, $Zn",
+        "", []>, Sched<[]> {
+  bits<4> Zn;
+  bits<3> Zd;
+  let Inst{31-14} = 0b110000001001101100;
+  let Inst{13-12} = sz;
+  let Inst{11-10} = op;
+  let Inst{9-6}   = Zn;
+  let Inst{5}     = 0b0;
+  let Inst{4}     = Zd{2};
+  let Inst{3-2}   = 0b00;
+  let Inst{1-0}   = Zd{1-0};
+}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 78c14379617a24c..d54be1e406fed95 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10203,3 +10203,55 @@ multiclass sve2_fp8_dot_indexed<string mnemonic>{
     let Inst{10} = 0b1;
   }
 }
+
+// FP8 Look up table
+class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
+                            Operand idx_ty, bits<4>opc, string mnemonic>
+    : I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
+      mnemonic, "\t$Zd, $Zn, $Zm$idx",
+      "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{22}    = opc{3};
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = opc{2-0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+// FP8 Look up table read with 2-bit indices
+multiclass sve2_luti2_vector_index<string mnemonic> {
+  def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
+    bits<2> idx;
+    let Inst{23-22} = idx;
+  }
+  def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexH32b, {?,?,0b10}, mnemonic> {
+    bits<3> idx;
+    let Inst{23-22} = idx{2-1};
+    let Inst{12}    = idx{0};
+  }
+}
+
+// FP8 Look up table read with 4-bit indices
+multiclass sve2_luti4_vector_index<string mnemonic> {
+  def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
+    bit idx;
+    let Inst{23} = idx;
+  }
+  def _H : sve2_lut_vector_index<ZPR16, Z_h, VectorIndexS32b, {?, 0b111}, mnemonic> {
+    bits<2> idx;
+    let Inst{23-22} = idx;
+  }
+}
+
+// FP8 Look up table read with 4-bit indices (two contiguous registers)
+multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
+  def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
+    bits<2> idx;
+    let Inst{23-22} = idx;
+  }
+}
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
index b4a1110e5bca720..c4d8dbf44b03b68 100644
--- a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -35,3 +35,21 @@ fdot  v31.4h, v0.8b, v0.8b
 fdot  v0.2s, v0.8b, v31.8b
 // CHECK: error: instruction requires: fp8dot4
 // CHECK: fdot  v0.2s, v0.8b, v31.8b
+
+.arch armv9-a+lut
+.arch armv9-a+nolut
+luti2  v30.8h, { v20.8h }, v31[7]
+// CHECK: error: instruction requires: lut
+// CHECK: luti2  v30.8h, { v20.8h }, v31[7]
+
+.arch armv9-a+sve2+lut
+.arch armv9-a+nosve2+nolut
+luti2  z0.h, { z0.h }, z0[0]
+// CHECK: error: instruction requires: lut sve2 or sme2
+// CHECK: luti2  z0.h, { z0.h }, z0[0]
+
+.arch armv9-a+sme-lutv2
+.arch armv9-a+nosme-lutv2
+luti4  { z0.b - z3.b }, zt0, { z0, z1 }
+// CHECK: error: instruction requires: sme2 sme-lutv2
+// CHECK: luti4  { z0.b - z3.b }, zt0, { z0, z1 }
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
index e984210fe3ef13f..b9803b79b58ef93 100644
--- a/llvm/test/MC/AArch64/FP8/directive-arch.s
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -29,3 +29,18 @@ fdot  v31.4h, v0.8b, v0.8b
 fdot  v0.2s, v0.8b, v31.8b
 // CHECK: fdot  v0.2s, v0.8b, v31.8b
 .arch armv9-a+nofp8dot4
+
+.arch armv9-a+lut
+luti2  v30.8h, {v20.8h}, v31[7]
+// CHECK: luti2  v30.8h, { v20.8h }, v31[7]
+.arch armv9-a+nolut
+
+.arch armv9-a+sve2+lut
+luti2  z0.h, {z0.h}, z0[0]
+// CHECK: luti2  z0.h, { z0.h }, z0[0]
+.arch armv9-a+nosve2+nolut
+
+.arch armv9-a+sme2p1+sme-lutv2
+luti4  {z0.b-z3.b}, zt0, {z0-z1}
+// CHECK: luti4  { z0.b - z3.b }, zt0, { z0, z1 }
+.arch armv9-a+nosme2p1+nosme-lutv2
diff --git a/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s b/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s
new file mode 100644
index 000000000000000..1f32fd8b0c027ed
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti2-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+lut  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid lane indices
+
+luti2 v2.16b, {v1.16b}, v0[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 v2.16b, {v1.16b}, v0[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v3.16b, {v2.16b}, v1[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 v3.16b, {v2.16b}, v1[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v30.8h, {v21.8h}, v20[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 v30.8h, {v21.8h}, v20[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v31.8h, {v31.8h}, v31[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 v31.8h, {v31.8h}, v31[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector suffix
+
+luti2 v2.8h, {v1.16b}, v0[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti2 v2.8h, {v1.16b}, v0[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 v31.16b, {v31.8h}, v31[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti2 v31.16b, {v31.8h}, v31[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8/luti2.s b/llvm/test/MC/AArch64/FP8/luti2.s
new file mode 100644
index 000000000000000..c5f99f0fb87cb15
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti2.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+lut < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=+lut - | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=-lut - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+lut < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+lut -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+luti2   v1.16b, {v2.16b}, v0[0]  // 01001110-10000000-00010000-01000001
+// CHECK-INST: luti2   v1.16b, { v2.16b }, v0[0]
+// CHECK-ENCODING: [0x41,0x10,0x80,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e801041 <unknown>
+
+luti2   v30.16b, {v20.16b}, v31[3]  // 01001110-10011111-01110010-10011110
+// CHECK-INST: luti2   v30.16b, { v20.16b }, v31[3]
+// CHECK-ENCODING: [0x9e,0x72,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e9f729e <unknown>
+
+luti2   v1.8h, {v2.8h}, v0[0]  // 01001110-11000000-00000000-01000001
+// CHECK-INST: luti2   v1.8h, { v2.8h }, v0[0]
+// CHECK-ENCODING: [0x41,0x00,0xc0,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4ec00041 <unknown>
+
+luti2   v30.8h, {v20.8h}, v31[7]  // 01001110-11011111-01110010-10011110
+// CHECK-INST: luti2   v30.8h, { v20.8h }, v31[7]
+// CHECK-ENCODING: [0x9e,0x72,0xdf,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4edf729e <unknown>
diff --git a/llvm/test/MC/AArch64/FP8/luti4-diagnostics.s b/llvm/test/MC/AArch64/FP8/luti4-diagnostics.s
new file mode 100644
index 000000000000000..a8b936d87c2fa7b
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti4-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+lut  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid lane indices
+
+luti4 v2.16b, {v1.16b}, v0[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
+// CHECK-NEXT: luti4 v2.16b, {v1.16b}, v0[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 v3.16b, {v2.16b}, v1[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
+// CHECK-NEXT: luti4 v3.16b, {v2.16b}, v1[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 v3.8h, {v0.8h, v1.8h}, v2[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 v3.8h, {v0.8h, v1.8h}, v2[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 v3.8h, {v0.8h, v1.8h}, v2[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 v3.8h, {v0.8h, v1.8h}, v2[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector lists
+
+luti4 v30.8h, {v0.8h, v2.8h}, v3[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 v30.8h, {v0.8h, v2.8h}, v3[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector suffix
+
+luti4 v2.8h, {v1.16b}, v0[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 v2.8h, {v1.16b}, v0[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 v31.8h, {v20.16b, v21.16b}, v31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 v31.8h,  {v20.16b, v21.16b}, v31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 v3.s, {v0.8h, v1.8h}, v2[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 v3.s, {v0.8h, v1.8h}, v2[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/luti4.s b/llvm/test/MC/AArch64/FP8/luti4.s
new file mode 100644
index 000000000000000..77e5b708f619cd3
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/luti4.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+lut < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=+lut - | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=-lut - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+lut < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+lut -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+luti4   v1.16b, {v2.16b}, v0[0]  // 01001110-01000000-00100000-01000001
+// CHECK-INST: luti4   v1.16b, { v2.16b }, v0[0]
+// CHECK-ENCODING: [0x41,0x20,0x40,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e402041 <unknown>
+
+luti4   v30.16b, {v20.16b}, v31[1]  // 01001110-01011111-01100010-10011110
+// CHECK-INST: luti4   v30.16b, { v20.16b }, v31[1]
+// CHECK-ENCODING: [0x9e,0x62,0x5f,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e5f629e <unknown>
+
+luti4   v1.8h, {v2.8h, v3.8h}, v0[0]  // 01001110-01000000-00010000-01000001
+// CHECK-INST: luti4   v1.8h, { v2.8h, v3.8h }, v0[0]
+// CHECK-ENCODING: [0x41,0x10,0x40,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e401041 <unknown>
+
+luti4   v30.8h, {v20.8h, v21.8h}, v31[3]  // 01001110-01011111-01110010-10011110
+// CHECK-INST: luti4   v30.8h, { v20.8h, v21.8h }, v31[3]
+// CHECK-ENCODING: [0x9e,0x72,0x5f,0x4e]
+// CHECK-ERROR: instruction requires: lut
+// CHECK-UNKNOWN: 4e5f729e <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SME2/lut-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/lut-diagnostics.s
new file mode 100644
index 000000000000000..36a5b12bc482317
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/lut-diagnostics.s
@@ -0,0 +1,27 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p1,+sme-lutv2  2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+
+luti4   {z0-z3}, zt0, {z0.b-z1.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4   {z0-z3}, zt0, {z0.b-z1.b}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4   {z0.d, z4.d, z8.d, z12.d}, zt0, {z0-z1}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4   {z0.d, z4.d, z8.d, z12.d}, zt0, {z0-z1}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector grouping
+
+luti4   {z0.b-z1.b}, zt0, {z0-z4}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: luti4   {z0.b-z1.b}, zt0, {z0-z4}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4   {z0.b - z12.b}, zt0, {z0-z1}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: luti4   {z0.b - z12.b}, zt0, {z0-z1}
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SME2/lut.s b/llvm/test/MC/AArch64/FP8_SME2/lut.s
new file mode 100644
index 000000000000000..96c88f5da9a6992
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/lut.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme2p1,+sme-lutv2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme2p1,+sme-lutv2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme2p1,+sme-lutv2 --no-print-imm-hex - \
+// RUN:        | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme2p1,+sme-lutv2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme-lutv2 --no-print-imm-hex - \
+// RUN:        | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme2p1,+sme-lutv2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme2p1,+sme-lutv2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+luti4   {z0.b-z3.b}, zt0, {z0-z1}  // 11000000-10001011-00000000-00000000
+// CHECK-INST: luti4   { z0.b - z3.b }, zt0, { z0, z1 }
+// CHECK-ENCODING: [0x00,0x00,0x8b,0xc0]
+// CHECK-ERROR: instruction requires: sme2 sme-lutv2
+// CHECK-UNKNOWN: c08b0000 <unknown>
+
+luti4   {z28.b-z31.b}, zt0, {z30-z31}  // 11000000-10001011-00000011-11011100
+// CHECK-INST: luti4   { z28.b - z31.b }, zt0, { z30, z31 }
+// CHECK-ENCODING: [0xdc,0x03,0x8b,0xc0]
+// CHECK-ERROR: instruction requires: sme2 sme-lutv2
+// CHECK-UNKNOWN: c08b03dc <unknown>
+
+// Strided
+luti4   {z0.b, z4.b, z8.b, z12.b}, zt0, {z0-z1}  // 11000000-10011011-00000000-00000000
+// CHECK-INST: luti4   { z0.b, z4.b, z8.b, z12.b }, zt0, { z0, z1 }
+// CHECK-ENCODING: [0x00,0x00,0x9b,0xc0]
+// CHECK-ERROR: instruction requires: sme2p1 sme-lutv2
+// CHECK-UNKNOWN: c09b0000 <unknown>
+
+luti4   {z19.b, z23.b, z27.b, z31.b}, zt0, {z30-z31}  // 11000000-10011011-00000011-11010011
+// CHECK-INST: luti4   { z19.b, z23.b, z27.b, z31.b }, zt0, { z30, z31 }
+// CHECK-ENCODING: [0xd3,0x03,0x9b,0xc0]
+// CHECK-ERROR: instruction requires: sme2p1 sme-lutv2
+// CHECK-UNKNOWN: c09b03d3 <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SME2/movt-diagnostics.s b/llvm/test/MC/AArch64/FP8_SME2/movt-diagnostics.s
new file mode 100644
index 000000000000000..ba4538921121565
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/movt-diagnostics.s
@@ -0,0 +1,29 @@
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-lutv2  2>&1 < %s | FileCheck %s
+// --------------------------------------------------------------------------//
+// Invalid vector select register
+movt   z0, z31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid lookup table, expected zt0
+// CHECK-NEXT: movt   z0, z31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+// --------------------------------------------------------------------------//
+// Invalid vector select offset
+//
+movt    zt0[-1, mul vl], z31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 3].
+// CHECK-NEXT: movt    zt0[-1, mul vl], z31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+movt    zt0[4, mul vl], z31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 3].
+// CHECK-NEXT: movt    zt0[4, mul vl], z31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+// --------------------------------------------------------------------------//
+// Invalid mul vl
+movt  zt0[0, mul vl 3],  z0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: ']' expected
+// CHECK-NEXT: movt  zt0[0, mul vl 3],  z0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+movt  zt0[0, mul #4],  z0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: movt  zt0[0, mul #4],  z0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SME2/movt.s b/llvm/test/MC/AArch64/FP8_SME2/movt.s
new file mode 100644
index 000000000000000..fa6502e138ab65c
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SME2/movt.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-lutv2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-lutv2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sme-lutv2 --no-print-imm-hex - \
+// RUN:        | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-lutv2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme-lutv2 --no-print-imm-hex - \
+// RUN:        | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme2p1,+sme-lutv2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sme2p1,+sme-lutv2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+movt    zt0, z0  // 11000000-01001111-00000011-11100000
+// CHECK-INST: movt    zt0, z0
+// CHECK-ENCODING: [0xe0,0x03,0x4f,0xc0]
+// CHECK-ERROR: instruction requires: sme2 sme-lutv2
+// CHECK-UNKNOWN: c04f03e0 <unknown>
+
+movt    zt0[3, mul vl], z31  // 11000000-01001111-00110011-11111111
+// CHECK-INST: movt    zt0[3, mul vl], z31
+// CHECK-ENCODING: [0xff,0x33,0x4f,0xc0]
+// CHECK-ERROR: instruction requires: sme2 sme-lutv2
+// CHECK-UNKNOWN: c04f33ff <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/luti2-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/luti2-diagnostics.s
new file mode 100644
index 000000000000000..113d4f3946e3437
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/luti2-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+lut  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid lane indices
+
+luti2 z2.b, {z1.b}, z0[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 z2.b, {z1.b}, z0[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 z3.b, {z2.b}, z1[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti2 z3.b, {z2.b}, z1[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 z30.h, {z21.h}, z20[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 z30.h, {z21.h}, z20[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 z31.h, {z31.h}, z31[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: luti2 z31.h, {z31.h}, z31[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector suffix
+
+luti2 z2.h, {z1.b}, z0[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti2 z2.h, {z1.b}, z0[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti2 z31.b, {z31.h}, z31[7]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: luti2 z31.b, {z31.h}, z31[7]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/luti2.s b/llvm/test/MC/AArch64/FP8_SVE2/luti2.s
new file mode 100644
index 000000000000000..86c310c295b5cab
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/luti2.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+lut < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+lut --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=-lut --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+lut < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+lut -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+luti2   z0.b, {z0.b}, z0[0]  // 01000101-00100000-10110000-00000000
+// CHECK-INST: luti2   z0.b, { z0.b }, z0[0]
+// CHECK-ENCODING: [0x00,0xb0,0x20,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4520b000 <unknown>
+
+
+luti2   z21.b, {z10.b}, z21[1]  // 01000101-01110101-10110001-01010101
+// CHECK-INST: luti2   z21.b, { z10.b }, z21[1]
+// CHECK-ENCODING: [0x55,0xb1,0x75,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4575b155 <unknown>
+
+luti2   z31.b, {z31.b}, z31[3]  // 01000101-11111111-10110011-11111111
+// CHECK-INST: luti2   z31.b, { z31.b }, z31[3]
+// CHECK-ENCODING: [0xff,0xb3,0xff,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 45ffb3ff <unknown>
+
+luti2   z0.h, {z0.h}, z0[0]  // 01000101-00100000-10101000-00000000
+// CHECK-INST: luti2   z0.h, { z0.h }, z0[0]
+// CHECK-ENCODING: [0x00,0xa8,0x20,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4520a800 <unknown>
+
+luti2   z21.h, {z10.h}, z21[3]  // 01000101-01110101-10111001-01010101
+// CHECK-INST: luti2   z21.h, { z10.h }, z21[3]
+// CHECK-ENCODING: [0x55,0xb9,0x75,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4575b955 <unknown>
+
+luti2   z31.h, {z31.h}, z31[7]  // 01000101-11111111-10111011-11111111
+// CHECK-INST: luti2   z31.h, { z31.h }, z31[7]
+// CHECK-ENCODING: [0xff,0xbb,0xff,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 45ffbbff <unknown>
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/luti4-diagnostics.s b/llvm/test/MC/AArch64/FP8_SVE2/luti4-diagnostics.s
new file mode 100644
index 000000000000000..3b44ca00d6789d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/luti4-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+lut  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid lane indices
+
+luti4 z2.b, {z1.b}, z0[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
+// CHECK-NEXT: luti4 z2.b, {z1.b}, z0[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z3.b, {z2.b}, z1[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
+// CHECK-NEXT: luti4 z3.b, {z2.b}, z1[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z30.h, {z21.h}, z20[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 z30.h, {z21.h}, z20[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z31.h, {z31.h}, z31[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 z31.h, {z31.h}, z31[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z3.h, {z0.h, z1.h}, z2[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 z3.h, {z0.h, z1.h}, z2[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z3.h, {z0.h, z1.h}, z2[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: luti4 z3.h, {z0.h, z1.h}, z2[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector lists
+
+luti4 z30.h, {z0.h, z2.h}, z3[2]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 z30.h, {z0.h, z2.h}, z3[2]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector suffix
+
+luti4 z2.h, {z1.b}, z0[1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 z2.h, {z1.b}, z0[1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z31.h, {z31.b}, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: luti4 z31.h, {z31.b}, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+luti4 z3.s, {z0.h, z1.h}, z2[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: luti4 z3.s, {z0.h, z1.h}, z2[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8_SVE2/luti4.s b/llvm/test/MC/AArch64/FP8_SVE2/luti4.s
new file mode 100644
index 000000000000000..ce5137054a447ab
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8_SVE2/luti4.s
@@ -0,0 +1,65 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+lut < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+lut --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+lut < %s \
+// RUN:        | llvm-objdump -d --mattr=-lut --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+lut < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+lut -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+luti4   z0.b, {z0.b}, z0[0]  // 01000101-01100000-10100100-00000000
+// CHECK-INST: luti4   z0.b, { z0.b }, z0[0]
+// CHECK-ENCODING: [0x00,0xa4,0x60,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4560a400 <unknown>
+
+luti4   z31.b, {z31.b}, z31[1]  // 01000101-11111111-10100111-11111111
+// CHECK-INST: luti4   z31.b, { z31.b }, z31[1]
+// CHECK-ENCODING: [0xff,0xa7,0xff,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 45ffa7ff <unknown>
+
+luti4   z0.h, {z0.h}, z0[0]  // 01000101-00100000-10111100-00000000
+// CHECK-INST: luti4   z0.h, { z0.h }, z0[0]
+// CHECK-ENCODING: [0x00,0xbc,0x20,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4520bc00 <unknown>
+
+luti4   z21.h, {z10.h}, z21[1]  // 01000101-01110101-10111101-01010101
+// CHECK-INST: luti4   z21.h, { z10.h }, z21[1]
+// CHECK-ENCODING: [0x55,0xbd,0x75,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4575bd55 <unknown>
+
+luti4   z31.h, {z31.h}, z31[3]  // 01000101-11111111-10111111-11111111
+// CHECK-INST: luti4   z31.h, { z31.h }, z31[3]
+// CHECK-ENCODING: [0xff,0xbf,0xff,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 45ffbfff <unknown>
+
+luti4   z0.h, {z0.h, z1.h}, z0[0]  // 01000101-00100000-10110100-00000000
+// CHECK-INST: luti4   z0.h, { z0.h, z1.h }, z0[0]
+// CHECK-ENCODING: [0x00,0xb4,0x20,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4520b400 <unknown>
+
+luti4   z21.h, {z10.h, z11.h}, z21[1]  // 01000101-01110101-10110101-01010101
+// CHECK-INST: luti4   z21.h, { z10.h, z11.h }, z21[1]
+// CHECK-ENCODING: [0x55,0xb5,0x75,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 4575b555 <unknown>
+
+luti4   z31.h, {z31.h, z0.h}, z31[3]  // 01000101-11111111-10110111-11111111
+// CHECK-INST: luti4   z31.h, { z31.h, z0.h }, z31[3]
+// CHECK-ENCODING: [0xff,0xb7,0xff,0x45]
+// CHECK-ERROR: instruction requires: lut sve2 or sme2
+// CHECK-UNKNOWN: 45ffb7ff <unknown>
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index d4c5d68cb3ca7cb..16ea62a621d2ae7 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1745,7 +1745,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_FAMINMAX,     AArch64::AEK_FP8FMA,
       AArch64::AEK_SSVE_FP8FMA,  AArch64::AEK_FP8DOT2,
       AArch64::AEK_SSVE_FP8DOT2, AArch64::AEK_FP8DOT4,
-      AArch64::AEK_SSVE_FP8DOT4};
+      AArch64::AEK_SSVE_FP8DOT4, AArch64::AEK_LUT,
+      AArch64::AEK_SME_LUTv2};
 
   std::vector<StringRef> Features;
 
@@ -1826,6 +1827,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4"));
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+lut"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme-lutv2"));
 
   // Assuming we listed every extension above, this should produce the same
   // result. (note that AEK_NONE doesn't have a name so it won't be in the
@@ -1958,6 +1961,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"ssve-fp8dot2", "nossve-fp8dot2", "+ssve-fp8dot2", "-ssve-fp8dot2"},
       {"fp8dot4", "nofp8dot4", "+fp8dot4", "-fp8dot4"},
       {"ssve-fp8dot4", "nossve-fp8dot4", "+ssve-fp8dot4", "-ssve-fp8dot4"},
+      {"lut", "nolut", "+lut", "-lut"},
+      {"sme-lutv2", "nosme-lutv2", "+sme-lutv2", "-sme-lutv2"},
   };
 
   for (unsigned i = 0; i < std::size(ArchExt); i++) {