[llvm] [LLVM][AArch64] Add assembly/disassembly for FTMOPA and BFTMOPA (PR #113230)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 07:19:57 PDT 2024
https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/113230
>From 455b52ccb96a6b27e4826a7e7666d98bbed94c42 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Mon, 21 Oct 2024 22:15:01 +0000
Subject: [PATCH 1/2] [LLVM][AArch64] Add assembly/disassembly for FTMOPA and
BFTMOPA
This patch adds assembly/disassembly for the following SME2p2 instructions (part of the 2024 AArch64 ISA update)
- BFTMOPA (widening) - FEAT_SME2p2
- BFTMOPA (non-widening) - FEAT_SME2p2 & FEAT_SME_B16B16
- FTMOPA (4-way) - FEAT_SME2p2 & FEAT_SME_F8F32
- FTMOPA (2-way, 8-to-16) - FEAT_SME2p2 & FEAT_SME_F8F16
- FTMOPA (2-way, 16-to-32) - FEAT_SME2p2
- FTMOPA (non-widening, f16) - FEAT_SME2p2 & FEAT_SME_F16F16
- FTMOPA (non-widening, f32) - FEAT_SME2p2
- Add new ZPR_K register class and ZK register operand
- Introduce assembler extension tests for the new sme2p2 feature
In accordance with: https://developer.arm.com/documentation/ddi0602/latest/
Co-authored-by: Marian Lukac marian.lukac at arm.com
---
.../lib/Target/AArch64/AArch64RegisterInfo.td | 9 +
.../lib/Target/AArch64/AArch64SMEInstrInfo.td | 21 ++
.../AArch64/AsmParser/AArch64AsmParser.cpp | 5 +
.../Disassembler/AArch64Disassembler.cpp | 19 ++
.../MCTargetDesc/AArch64MCCodeEmitter.cpp | 18 ++
llvm/lib/Target/AArch64/SMEInstrFormats.td | 62 +++++
.../AArch64/GlobalISel/regbank-inlineasm.mir | 4 +-
llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll | 14 +-
.../emit_fneg_with_non_register_operand.mir | 4 +-
llvm/test/CodeGen/AArch64/fmlal-loreg.ll | 2 +-
.../CodeGen/AArch64/peephole-insvigpr.mir | 2 +-
.../MC/AArch64/SME2p2/bftmopa-diagnostics.s | 114 ++++++++++
llvm/test/MC/AArch64/SME2p2/bftmopa.s | 53 +++++
.../AArch64/SME2p2/directive-arch-negative.s | 7 +
llvm/test/MC/AArch64/SME2p2/directive-arch.s | 5 +
.../directive-arch_extension-negative.s | 7 +
.../AArch64/SME2p2/directive-arch_extension.s | 5 +
.../AArch64/SME2p2/directive-cpu-negative.s | 7 +
llvm/test/MC/AArch64/SME2p2/directive-cpu.s | 5 +
.../MC/AArch64/SME2p2/ftmopa-diagnostics.s | 212 ++++++++++++++++++
llvm/test/MC/AArch64/SME2p2/ftmopa.s | 113 ++++++++++
21 files changed, 675 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/MC/AArch64/SME2p2/bftmopa-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/bftmopa.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-arch-negative.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-arch.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-arch_extension-negative.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-arch_extension.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-cpu-negative.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/directive-cpu.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/ftmopa-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/SME2p2/ftmopa.s
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 7f629a78fb442c..8516ab2c7dd71c 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1210,6 +1210,15 @@ def ZPRMul2AsmOp32_Hi : ZPRAsmOperand<"VectorS_Hi", 32, "Mul2_Hi">;
def ZPRMul2AsmOp64_Lo : ZPRAsmOperand<"VectorD_Lo", 64, "Mul2_Lo">;
def ZPRMul2AsmOp64_Hi : ZPRAsmOperand<"VectorD_Hi", 64, "Mul2_Hi">;
+def ZPR_K : RegisterClass<"AArch64", [untyped], 128,
+ (add Z20, Z21, Z22, Z23, Z28, Z29, Z30, Z31)>;
+
+def ZK : RegisterOperand<ZPR_K, "printSVERegOp<>">{
+ let EncoderMethod = "EncodeZK";
+ let DecoderMethod = "DecodeZK";
+ let ParserMatchClass = ZPRAsmOperand<"Vector_20to23or28to31", 0, "_K">;
+}
+
def ZPR8Mul2_Lo : ZPRMul2_MinToMaxRegOp<"b", ZPRMul2AsmOp8_Lo, 0, 14, ElementSizeB, ZPRMul2_Lo>;
def ZPR8Mul2_Hi : ZPRMul2_MinToMaxRegOp<"b", ZPRMul2AsmOp8_Hi, 16, 30, ElementSizeB, ZPRMul2_Hi>;
def ZPR16Mul2_Lo : ZPRMul2_MinToMaxRegOp<"h", ZPRMul2AsmOp16_Lo, 0, 14, ElementSizeH, ZPRMul2_Lo>;
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 802797a14ee42d..6044b5bb7d8151 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1000,3 +1000,24 @@ defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_fr
} //[HasSMEF8F32]
+let Predicates = [HasSME2p2] in {
+ def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">;
+ def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">;
+ def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">;
+} // [HasSME2p2]
+
+let Predicates = [HasSME2p2, HasSMEB16B16] in {
+ def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">;
+} // [HasSME2p2, HasSMEB16B16]
+
+let Predicates = [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR] in {
+ def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">;
+} // [HasSME2p2, HasSMEF8F32], Uses = [FPMR, FPCR]
+
+let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
+ def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
+} // [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR]
+
+let Predicates = [HasSME2p2, HasSMEF16F16] in {
+ def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
+} // [HasSME2p2, HasSMEF16F16]
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 72b9f252a71878..d0d2fda23a580b 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1264,6 +1264,7 @@ class AArch64Operand : public MCParsedAsmOperand {
case AArch64::ZPR_4bRegClassID:
case AArch64::ZPRMul2_LoRegClassID:
case AArch64::ZPRMul2_HiRegClassID:
+ case AArch64::ZPR_KRegClassID:
RK = RegKind::SVEDataVector;
break;
case AArch64::PPRRegClassID:
@@ -6118,6 +6119,9 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidZPRMul2_Hi64:
return Error(Loc, "Invalid restricted vector register, expected even "
"register in z16.d..z30.d");
+ case Match_InvalidZPR_K0:
+ return Error(Loc, "invalid restricted vector register, expected register "
+ "in z20..z23 or z28..z31");
case Match_InvalidSVEPattern:
return Error(Loc, "invalid predicate pattern");
case Match_InvalidSVEPPRorPNRAnyReg:
@@ -6827,6 +6831,7 @@ bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidZPRMul2_Hi32:
case Match_InvalidZPRMul2_Lo64:
case Match_InvalidZPRMul2_Hi64:
+ case Match_InvalidZPR_K0:
case Match_InvalidSVEVectorList2x8Mul2:
case Match_InvalidSVEVectorList2x16Mul2:
case Match_InvalidSVEVectorList2x32Mul2:
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 87c4245b55357c..f83bf57650062c 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -49,6 +49,8 @@ template <unsigned Min, unsigned Max>
static DecodeStatus DecodeZPRMul2_MinMax(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus DecodeZK(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
template <unsigned Min, unsigned Max>
static DecodeStatus DecodeZPR2Mul2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
@@ -387,6 +389,23 @@ static DecodeStatus DecodeZPR2Mul2RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
+// Zk Is the name of the control vector register Z20-Z23 or Z28-Z31, encoded in
+// the "K:Zk" fields. Z20-Z23 = 000, 001,010, 011 and Z28-Z31 = 100, 101, 110,
+// 111
+static DecodeStatus DecodeZK(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ // RegNo < 4 => Reg is in Z20-Z23 (offset 20)
+ // RegNo >= 4 => Reg is in Z28-Z31 (offset 24)
+ unsigned Reg = (RegNo < 4) ? (RegNo + 20) : (RegNo + 24);
+ if (!(Reg >= 20 && Reg <= 23) && !(Reg >= 28 && Reg <= 31))
+ return Fail;
+
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPRRegClassID].getRegister(Reg);
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
static DecodeStatus DecodeZPR4Mul4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 61b83847666908..85ffb8639dadf5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -195,6 +195,9 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
uint32_t EncodeRegMul_MinMax(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint32_t EncodeZK(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
uint32_t EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -573,6 +576,21 @@ AArch64MCCodeEmitter::EncodeRegMul_MinMax(const MCInst &MI, unsigned OpIdx,
return (RegVal - Min) / Multiple;
}
+// Zk Is the name of the control vector register Z20-Z23 or Z28-Z31, encoded in
+// the "K:Zk" fields. Z20-Z23 = 000, 001,010, 011 and Z28-Z31 = 100, 101, 110,
+// 111
+uint32_t AArch64MCCodeEmitter::EncodeZK(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ auto RegOpnd = MI.getOperand(OpIdx).getReg();
+ unsigned RegVal = Ctx.getRegisterInfo()->getEncodingValue(RegOpnd);
+ // Z28 => RegVal = 28 (28 - 24 = 4) Z28 = 4
+ if (RegOpnd > AArch64::Z27)
+ return (RegVal - 24);
+ // Z20 => RegVal = 20 (20 -20 = 0) Z20 = 0
+ return (RegVal - 20);
+}
+
uint32_t
AArch64MCCodeEmitter::EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 38d256c8234118..08929ed5616b2c 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3192,6 +3192,68 @@ multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator i
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
}
+//===----------------------------------------------------------------------===//
+// SME2 Sparse Outer Product and Accumulate
+
+class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
+ : I<(outs TileOp16:$ZAda),
+ (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
+ mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
+ "", []>,
+ Sched<[]> {
+ bit ZAda;
+ bits<4> Zn;
+ bits<5> Zm;
+ bits<3> Zk;
+ bits<2> imm;
+ let Inst{31-25} = 0b1000000;
+ let Inst{24} = opc{4};
+ let Inst{23-22} = 0b01;
+ let Inst{21} = opc{3};
+ let Inst{20-16} = Zm;
+ let Inst{15} = opc{2};
+ let Inst{14} = 0b0;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Zk;
+ let Inst{9-6} = Zn;
+ let Inst{5-4} = imm;
+ let Inst{3} = opc{0};
+ let Inst{2-1} = 0b00;
+ let Inst{0} = ZAda;
+
+ let Constraints = "$ZAda = $_ZAda";
+}
+
+class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
+ : I<(outs TileOp32:$ZAda),
+ (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
+ mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
+ "", []>,
+ Sched<[]> {
+ bits<2> ZAda;
+ bits<4> Zn;
+ bits<5> Zm;
+ bits<3> Zk;
+ bits<2> imm;
+ let Inst{31-25} = 0b1000000;
+ let Inst{24} = opc{4};
+ let Inst{23-22} = 0b01;
+ let Inst{21} = opc{3};
+ let Inst{20-16} = Zm;
+ let Inst{15} = opc{2};
+ let Inst{14} = 0b0;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Zk;
+ let Inst{9-6} = Zn;
+ let Inst{5-4} = imm;
+ let Inst{3} = opc{0};
+ let Inst{2} = 0b0;
+ let Inst{1-0} = ZAda;
+
+ let Constraints = "$ZAda = $_ZAda";
+}
+
+
//===----------------------------------------------------------------------===///
// SME2 Zero Lookup Table.
class sme2_zero_zt<string mnemonic, bits<4> opc>
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
index 7186b3de442bcb..f1d1b691fe1aa5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
@@ -57,7 +57,7 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_reg_output
- ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2490378 /* regdef:FPR32_with_hsub_in_FPR16_lo */, def %0
+ ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -75,7 +75,7 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_mixed_types
- ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2490378 /* regdef:FPR32_with_hsub_in_FPR16_lo */, def %0, 3342346 /* regdef:GPR64 */, def %1
+ ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0, 3342346 /* regdef:FPR64 */, def %1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1
; CHECK-NEXT: $d0 = COPY [[COPY1]](s64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
index 2d12c08eb8eee9..9f8897575b3d58 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll
@@ -13,7 +13,7 @@ define <vscale x 16 x i8> @test_svadd_i8(<vscale x 16 x i8> %Zn, <vscale x 16 x
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr_3b = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"add $0.b, $1.b, $2.b", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %2, 5701641 /* reguse:ZPR */, [[COPY2]], 6291465 /* reguse:ZPR_3b */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"add $0.b, $1.b, $2.b", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 5767177 /* reguse:ZPR */, [[COPY2]], 6357001 /* reguse:ZPR_3b */, [[COPY3]]
; CHECK-NEXT: $z0 = COPY %2
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 16 x i8> asm "add $0.b, $1.b, $2.b", "=w,w,y"(<vscale x 16 x i8> %Zn, <vscale x 16 x i8> %Zm)
@@ -29,7 +29,7 @@ define <vscale x 2 x i64> @test_svsub_i64(<vscale x 2 x i64> %Zn, <vscale x 2 x
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr_4b = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"sub $0.d, $1.d, $2.d", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %2, 5701641 /* reguse:ZPR */, [[COPY2]], 5963785 /* reguse:ZPR_4b */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"sub $0.d, $1.d, $2.d", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 5767177 /* reguse:ZPR */, [[COPY2]], 6029321 /* reguse:ZPR_4b */, [[COPY3]]
; CHECK-NEXT: $z0 = COPY %2
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 2 x i64> asm "sub $0.d, $1.d, $2.d", "=w,w,x"(<vscale x 2 x i64> %Zn, <vscale x 2 x i64> %Zm)
@@ -45,7 +45,7 @@ define <vscale x 8 x half> @test_svfmul_f16(<vscale x 8 x half> %Zn, <vscale x 8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr_3b = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"fmul $0.h, $1.h, $2.h", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %2, 5701641 /* reguse:ZPR */, [[COPY2]], 6291465 /* reguse:ZPR_3b */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"fmul $0.h, $1.h, $2.h", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 5767177 /* reguse:ZPR */, [[COPY2]], 6357001 /* reguse:ZPR_3b */, [[COPY3]]
; CHECK-NEXT: $z0 = COPY %2
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 8 x half> asm "fmul $0.h, $1.h, $2.h", "=w,w,y"(<vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
@@ -61,7 +61,7 @@ define <vscale x 4 x float> @test_svfmul_f(<vscale x 4 x float> %Zn, <vscale x 4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr_4b = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"fmul $0.s, $1.s, $2.s", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %2, 5701641 /* reguse:ZPR */, [[COPY2]], 5963785 /* reguse:ZPR_4b */, [[COPY3]]
+ ; CHECK-NEXT: INLINEASM &"fmul $0.s, $1.s, $2.s", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 5767177 /* reguse:ZPR */, [[COPY2]], 6029321 /* reguse:ZPR_4b */, [[COPY3]]
; CHECK-NEXT: $z0 = COPY %2
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 4 x float> asm "fmul $0.s, $1.s, $2.s", "=w,w,x"(<vscale x 4 x float> %Zn, <vscale x 4 x float> %Zm)
@@ -79,7 +79,7 @@ define <vscale x 8 x half> @test_svfadd_f16(<vscale x 16 x i1> %Pg, <vscale x 8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr_3b = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:zpr = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %3, 655369 /* reguse:PPR_3b */, [[COPY3]], 5701641 /* reguse:ZPR */, [[COPY4]], 5701641 /* reguse:ZPR */, [[COPY5]]
+ ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %3, 720905 /* reguse:PPR_3b */, [[COPY3]], 5767177 /* reguse:ZPR */, [[COPY4]], 5767177 /* reguse:ZPR */, [[COPY5]]
; CHECK-NEXT: $z0 = COPY %3
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w, at 3Upl,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
@@ -95,7 +95,7 @@ define <vscale x 4 x i32> @test_incp(<vscale x 16 x i1> %Pg, <vscale x 4 x i32>
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ppr = COPY $p0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"incp $0.s, $1", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %2, 393225 /* reguse:PPR */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"incp $0.s, $1", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 458761 /* reguse:PPR */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
; CHECK-NEXT: $z0 = COPY %2
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 4 x i32> asm "incp $0.s, $1", "=w, at 3Upa,0"(<vscale x 16 x i1> %Pg, <vscale x 4 x i32> %Zn)
@@ -113,7 +113,7 @@ define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %P
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr_p8to15 = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:zpr = COPY [[COPY1]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:zpr = COPY [[COPY]]
- ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, 5701642 /* regdef:ZPR */, def %3, 720905 /* reguse:PPR_p8to15 */, [[COPY3]], 5701641 /* reguse:ZPR */, [[COPY4]], 5701641 /* reguse:ZPR */, [[COPY5]]
+ ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %3, 786441 /* reguse:PPR_p8to15 */, [[COPY3]], 5767177 /* reguse:ZPR */, [[COPY4]], 5767177 /* reguse:ZPR */, [[COPY5]]
; CHECK-NEXT: $z0 = COPY %3
; CHECK-NEXT: RET_ReallyLR implicit $z0
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w, at 3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
index 260f816417701b..ffa7453e48b4f0 100644
--- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
+++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir
@@ -91,10 +91,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:GPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2
; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:GPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2
; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr
; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv
diff --git a/llvm/test/CodeGen/AArch64/fmlal-loreg.ll b/llvm/test/CodeGen/AArch64/fmlal-loreg.ll
index 20737a73183944..31ead890ba8ac7 100644
--- a/llvm/test/CodeGen/AArch64/fmlal-loreg.ll
+++ b/llvm/test/CodeGen/AArch64/fmlal-loreg.ll
@@ -11,8 +11,8 @@ define <4 x float> @test(ptr %lhs_panel, ptr %rhs_panel, <4 x float> %a) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset b8, -16
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: ldr q8, [x0]
; CHECK-NEXT: ldr q16, [x1]
+; CHECK-NEXT: ldr q8, [x0]
; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: //APP
; CHECK-NEXT: nop
diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
index 822083bbf8d55e..f8af5b96370178 100644
--- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
@@ -487,7 +487,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]]
- ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3342346 /* regdef:GPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3342346 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
diff --git a/llvm/test/MC/AArch64/SME2p2/bftmopa-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bftmopa-diagnostics.s
new file mode 100644
index 00000000000000..2577a286354c95
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bftmopa-diagnostics.s
@@ -0,0 +1,114 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid ZA register (range)
+
+bftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid ZA register (type-suffix)
+
+bftmopa za3.d, {z28.h-z29.h}, z31.h, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+// CHECK-NEXT: bftmopa za3.d, {z28.h-z29.h}, z31.h, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector list operand
+
+bftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: bftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.s, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za0.s, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: bftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid ZK register
+
+bftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.h, {z28.h-z29.h}, z31.h, z24[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z24[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z28.h-z29.h}, z31.h, z27[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z27[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.h, {z28.h-z29.h}, z31.h, z21.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z21.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.s, {z28.h-z29.h}, z31.h, z30.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: bftmopa za0.s, {z28.h-z29.h}, z31.h, z30.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate
+
+bftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]
+// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid ZPR type suffix
+
+bftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za0.h, {z28.b-z29.b}, z31.b, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za0.h, {z28.b-z29.b}, z31.b, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bftmopa za3.s, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bftmopa za3.s, {z28.s-z29.s}, z31.s, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bftmopa za3.s, {z28.s-z29.s}, z31.s, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SME2p2/bftmopa.s b/llvm/test/MC/AArch64/SME2p2/bftmopa.s
new file mode 100644
index 00000000000000..dc7b5456ddab33
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bftmopa.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-b16b16 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2p2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// non-widening
+
+bftmopa za0.h, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01100000-00000000-00001000
+// CHECK-INST: bftmopa za0.h, { z0.h, z1.h }, z0.h, z20[0]
+// CHECK-ENCODING: [0x08,0x00,0x60,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
+// CHECK-UNKNOWN: 81600008 <unknown>
+
+bftmopa za1.h, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01101000-00001101-10111001
+// CHECK-INST: bftmopa za1.h, { z12.h, z13.h }, z8.h, z23[3]
+// CHECK-ENCODING: [0xb9,0x0d,0x68,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
+// CHECK-UNKNOWN: 81680db9 <unknown>
+
+bftmopa za1.h, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01111111-00011111-11111001
+// CHECK-INST: bftmopa za1.h, { z30.h, z31.h }, z31.h, z31[3]
+// CHECK-ENCODING: [0xf9,0x1f,0x7f,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16
+// CHECK-UNKNOWN: 817f1ff9 <unknown>
+
+// widening
+
+bftmopa za0.s, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01000000-00000000-00000000
+// CHECK-INST: bftmopa za0.s, { z0.h, z1.h }, z0.h, z20[0]
+// CHECK-ENCODING: [0x00,0x00,0x40,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81400000 <unknown>
+
+bftmopa za3.s, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01001000-00001101-10110011
+// CHECK-INST: bftmopa za3.s, { z12.h, z13.h }, z8.h, z23[3]
+// CHECK-ENCODING: [0xb3,0x0d,0x48,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81480db3 <unknown>
+
+bftmopa za3.s, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01011111-00011111-11110011
+// CHECK-INST: bftmopa za3.s, { z30.h, z31.h }, z31.h, z31[3]
+// CHECK-ENCODING: [0xf3,0x1f,0x5f,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 815f1ff3 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-arch-negative.s b/llvm/test/MC/AArch64/SME2p2/directive-arch-negative.s
new file mode 100644
index 00000000000000..27169bac8133e7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-arch-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+sme2p2
+.arch armv9-a+nosme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: error: instruction requires: sme2p2
+// CHECK: ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-arch.s b/llvm/test/MC/AArch64/SME2p2/directive-arch.s
new file mode 100644
index 00000000000000..bb4e943a88478e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-arch.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+sme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SME2p2/directive-arch_extension-negative.s
new file mode 100644
index 00000000000000..68784db1c6272b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-arch_extension-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch_extension sme2p2
+.arch_extension nosme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: error: instruction requires: sme2p2
+// CHECK: ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-arch_extension.s b/llvm/test/MC/AArch64/SME2p2/directive-arch_extension.s
new file mode 100644
index 00000000000000..2e281e13f8ddb9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-arch_extension.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch_extension sme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-cpu-negative.s b/llvm/test/MC/AArch64/SME2p2/directive-cpu-negative.s
new file mode 100644
index 00000000000000..7cc829fbddd6ac
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-cpu-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.cpu generic+sme2p2
+.cpu generic+nosme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: error: instruction requires: sme2p2
+// CHECK: ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
diff --git a/llvm/test/MC/AArch64/SME2p2/directive-cpu.s b/llvm/test/MC/AArch64/SME2p2/directive-cpu.s
new file mode 100644
index 00000000000000..152da398a90654
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/directive-cpu.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.cpu generic+sme2p2
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0]
+// CHECK: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SME2p2/ftmopa-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/ftmopa-diagnostics.s
new file mode 100644
index 00000000000000..e1f328819a9167
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/ftmopa-diagnostics.s
@@ -0,0 +1,212 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid ZA register (range)
+
+ftmopa za2.h, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za2.h, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za2.h, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za4.s, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za4.s, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za4.s, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za4.s, {z30.s-z31.s}, z31.s, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za4.s, {z30.s-z31.s}, z31.s, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid ZA register (type suffix)
+
+ftmopa za0.b, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+// CHECK-NEXT: ftmopa za0.b, {z30.b-z31.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.d, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+// CHECK-NEXT: ftmopa za0.d, {z30.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z30.s-z31.s}, z31.s, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za0.h, {z30.s-z31.s}, z31.s, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid vector list operand
+
+ftmopa za0.h, {z28.b-z31.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za0.h, {z28.b-z31.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z29.b-z30.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: ftmopa za0.h, {z29.b-z30.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za0.h, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: ftmopa za0.h, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.b-z31.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za3.s, {z28.b-z31.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z29.b-z30.b}, z31.b, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: ftmopa za3.s, {z29.b-z30.b}, z31.b, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za3.s, {z28.h-z31.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: ftmopa za3.s, {z29.h-z30.h}, z31.h, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.s-z31.s}, z31.s, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ftmopa za3.s, {z28.s-z31.s}, z31.s, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z29.s-z30.s}, z31.s, z31[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: ftmopa za3.s, {z29.s-z30.s}, z31.s, z31[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid ZK register
+
+ftmopa za0.h, {z28.b-z29.b}, z31.b, z27[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za0.h, {z28.b-z29.b}, z31.b, z27[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.b-z29.b}, z31.b, z21.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za0.h, {z28.b-z29.b}, z31.b, z21.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za0.h, {z28.h-z29.h}, z31.h, z19[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.h-z29.h}, z31.h, z30.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za0.h, {z28.h-z29.h}, z31.h, z30.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.b-z29.b}, z31.b, z27[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.b-z29.b}, z31.b, z27[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.b-z29.b}, z31.b, z29.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.b-z29.b}, z31.b, z29.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.h-z29.h}, z31.h, z24[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.h-z29.h}, z31.h, z24[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.h-z29.h}, z31.h, z21.h[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.h-z29.h}, z31.h, z21.h[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.s-z29.s}, z31.s, z19[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.s-z29.s}, z31.s, z19[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.s-z29.s}, z31.s, z30.s[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted vector register, expected register in z20..z23 or z28..z31
+// CHECK-NEXT: ftmopa za3.s, {z28.s-z29.s}, z31.s, z30.s[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate
+
+ftmopa za0.h, {z28.b-z29.b}, z31.b, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: ftmopa za0.h, {z28.b-z29.b}, z31.b, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: ftmopa za0.h, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.b-z29.b}, z31.b, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: ftmopa za3.s, {z28.b-z29.b}, z31.b, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: ftmopa za3.s, {z28.h-z29.h}, z31.h, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za3.s, {z28.s-z29.s}, z31.s, z20[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: ftmopa za3.s, {z28.s-z29.s}, z31.s, z20[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid ZPR type suffix
+
+ftmopa za0.h, {z28.b-z29.b}, z31.h, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ftmopa za0.h, {z28.b-z29.b}, z31.h, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ftmopa za0.h, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.s, {z28.b-z29.b}, z31.h, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ftmopa za0.s, {z28.b-z29.b}, z31.h, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.s, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ftmopa za0.s, {z28.h-z29.h}, z31.s, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ftmopa za0.s, {z28.s-z29.s}, z31.h, z20[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ftmopa za0.s, {z28.s-z29.s}, z31.h, z20[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
diff --git a/llvm/test/MC/AArch64/SME2p2/ftmopa.s b/llvm/test/MC/AArch64/SME2p2/ftmopa.s
new file mode 100644
index 00000000000000..6944b94e6a812c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/ftmopa.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 < %s \
+// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 < %s \
+// RUN: | llvm-objdump -d --mattr=-sme2p2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32,+sme-f8f16,+sme-f16f16 -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// 2-way (fp8-to-fp16)
+
+ftmopa za0.h, {z0.b-z1.b}, z0.b, z20[0] // 10000000-01100000-00000000-00001000
+// CHECK-INST: ftmopa za0.h, { z0.b, z1.b }, z0.b, z20[0]
+// CHECK-ENCODING: [0x08,0x00,0x60,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f16
+// CHECK-UNKNOWN: 80600008 <unknown>
+
+ftmopa za1.h, {z10.b-z11.b}, z21.b, z29[1] // 10000000-01110101-00010101-01011001
+// CHECK-INST: ftmopa za1.h, { z10.b, z11.b }, z21.b, z29[1]
+// CHECK-ENCODING: [0x59,0x15,0x75,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f16
+// CHECK-UNKNOWN: 80751559 <unknown>
+
+ftmopa za1.h, {z30.b-z31.b}, z31.b, z31[3] // 10000000-01111111-00011111-11111001
+// CHECK-INST: ftmopa za1.h, { z30.b, z31.b }, z31.b, z31[3]
+// CHECK-ENCODING: [0xf9,0x1f,0x7f,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f16
+// CHECK-UNKNOWN: 807f1ff9 <unknown>
+
+// 2-way, (fp16-to-fp32)
+
+ftmopa za0.s, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01100000-00000000-00000000
+// CHECK-INST: ftmopa za0.s, { z0.h, z1.h }, z0.h, z20[0]
+// CHECK-ENCODING: [0x00,0x00,0x60,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81600000 <unknown>
+
+ftmopa za3.s, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01101000-00001101-10110011
+// CHECK-INST: ftmopa za3.s, { z12.h, z13.h }, z8.h, z23[3]
+// CHECK-ENCODING: [0xb3,0x0d,0x68,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81680db3 <unknown>
+
+ftmopa za3.s, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01111111-00011111-11110011
+// CHECK-INST: ftmopa za3.s, { z30.h, z31.h }, z31.h, z31[3]
+// CHECK-ENCODING: [0xf3,0x1f,0x7f,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 817f1ff3 <unknown>
+
+// 4-way
+
+ftmopa za0.s, {z0.b-z1.b}, z0.b, z20[0] // 10000000-01100000-00000000-00000000
+// CHECK-INST: ftmopa za0.s, { z0.b, z1.b }, z0.b, z20[0]
+// CHECK-ENCODING: [0x00,0x00,0x60,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80600000 <unknown>
+
+ftmopa za3.s, {z12.b-z13.b}, z8.b, z23[3] // 10000000-01101000-00001101-10110011
+// CHECK-INST: ftmopa za3.s, { z12.b, z13.b }, z8.b, z23[3]
+// CHECK-ENCODING: [0xb3,0x0d,0x68,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80680db3 <unknown>
+
+ftmopa za3.s, {z30.b-z31.b}, z31.b, z31[3] // 10000000-01111111-00011111-11110011
+// CHECK-INST: ftmopa za3.s, { z30.b, z31.b }, z31.b, z31[3]
+// CHECK-ENCODING: [0xf3,0x1f,0x7f,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 807f1ff3 <unknown>
+
+// non-widening (half-precision)
+
+ftmopa za0.h, {z0.h-z1.h}, z0.h, z20[0] // 10000001-01000000-00000000-00001000
+// CHECK-INST: ftmopa za0.h, { z0.h, z1.h }, z0.h, z20[0]
+// CHECK-ENCODING: [0x08,0x00,0x40,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81400008 <unknown>
+
+ftmopa za1.h, {z12.h-z13.h}, z8.h, z23[3] // 10000001-01001000-00001101-10111001
+// CHECK-INST: ftmopa za1.h, { z12.h, z13.h }, z8.h, z23[3]
+// CHECK-ENCODING: [0xb9,0x0d,0x48,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81480db9 <unknown>
+
+ftmopa za1.h, {z30.h-z31.h}, z31.h, z31[3] // 10000001-01011111-00011111-11111011
+// CHECK-INST: ftmopa za1.h, { z30.h, z31.h }, z31.h, z31[3]
+// CHECK-ENCODING: [0xf9,0x1f,0x5f,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 815f1ff9 <unknown>
+
+// non-widening (single-precision)
+
+ftmopa za0.s, {z0.s-z1.s}, z0.s, z20[0] // 10000000-01000000-00000000-00000000
+// CHECK-INST: ftmopa za0.s, { z0.s, z1.s }, z0.s, z20[0]
+// CHECK-ENCODING: [0x00,0x00,0x40,0x80]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 80400000 <unknown>
+
+ftmopa za3.s, {z12.s-z13.s}, z8.s, z23[3] // 10000000-01001000-00001101-10110011
+// CHECK-INST: ftmopa za3.s, { z12.s, z13.s }, z8.s, z23[3]
+// CHECK-ENCODING: [0xb3,0x0d,0x48,0x80]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 80480db3 <unknown>
+
+ftmopa za3.s, {z30.s-z31.s}, z31.s, z31[3] // 10000000-01011111-00011111-11110011
+// CHECK-INST: ftmopa za3.s, { z30.s, z31.s }, z31.s, z31[3]
+// CHECK-ENCODING: [0xf3,0x1f,0x5f,0x80]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 805f1ff3 <unknown>
\ No newline at end of file
>From ae825a364bbac4902518951fcfefde78333aa944 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 22 Oct 2024 14:09:26 +0000
Subject: [PATCH 2/2] Use ZPR_K register class in Decoder method
---
.../Target/AArch64/Disassembler/AArch64Disassembler.cpp | 7 ++-----
.../Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 8 ++++++--
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index f83bf57650062c..52f91662d21886 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -394,14 +394,11 @@ static DecodeStatus DecodeZPR2Mul2RegisterClass(MCInst &Inst, unsigned RegNo,
// 111
static DecodeStatus DecodeZK(MCInst &Inst, unsigned RegNo, uint64_t Address,
const MCDisassembler *Decoder) {
- // RegNo < 4 => Reg is in Z20-Z23 (offset 20)
- // RegNo >= 4 => Reg is in Z28-Z31 (offset 24)
- unsigned Reg = (RegNo < 4) ? (RegNo + 20) : (RegNo + 24);
- if (!(Reg >= 20 && Reg <= 23) && !(Reg >= 28 && Reg <= 31))
+ if (RegNo > 7)
return Fail;
unsigned Register =
- AArch64MCRegisterClasses[AArch64::ZPRRegClassID].getRegister(Reg);
+ AArch64MCRegisterClasses[AArch64::ZPR_KRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 85ffb8639dadf5..a019ff8eac3328 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -584,10 +584,14 @@ uint32_t AArch64MCCodeEmitter::EncodeZK(const MCInst &MI, unsigned OpIdx,
const MCSubtargetInfo &STI) const {
auto RegOpnd = MI.getOperand(OpIdx).getReg();
unsigned RegVal = Ctx.getRegisterInfo()->getEncodingValue(RegOpnd);
- // Z28 => RegVal = 28 (28 - 24 = 4) Z28 = 4
+
+ // ZZ8-Z31 => Reg is in 3..7 (offset 24)
if (RegOpnd > AArch64::Z27)
return (RegVal - 24);
- // Z20 => RegVal = 20 (20 -20 = 0) Z20 = 0
+
+ assert((RegOpnd > AArch64::Z19 && RegOpnd < AArch64::Z24) &&
+ "Expected ZK in Z20..Z23 or Z28..Z31");
+ // Z20-Z23 => Reg is in 0..3 (offset 20)
return (RegVal - 20);
}
More information about the llvm-commits
mailing list