[llvm] [AArch64] Add assembly/disassembly for FMOP4{A,S} (non-widening) half-precision instructions (PR #113343)

Fri Oct 25 09:24:41 PDT 2024

https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/113343

>From c6512a7cd62806960e67d19c6fe5469e7aedf0d4 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 22 Oct 2024 15:22:52 +0100
Subject: [PATCH 1/2] [AArch64] Add assembly/disassembly for FMOP4{A,S}
 (non-widening) half-precision instructions

---
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |   2 +
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |   3 +
 llvm/lib/Target/AArch64/SMEInstrFormats.td    |  37 +++
 .../fmop4as-fp16-non-widening-diagnostics.s   | 220 ++++++++++++++++++
 .../SME2p2/fmop4as-fp16-non-widening.s        | 179 ++++++++++++++
 5 files changed, 441 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s

diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 6044b5bb7d8151..7517e6d7c1087a 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1020,4 +1020,6 @@ let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
 
 let Predicates = [HasSME2p2, HasSMEF16F16] in {
   def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
+  defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">;
+  defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">;
 } // [HasSME2p2, HasSMEF16F16]
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index dfc5e04110cf57..be06ede7aeb722 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6261,6 +6261,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
   case Match_InvalidMatrixTileVectorV128:
     return Error(Loc,
                  "invalid matrix operand, expected za[0-15]h.q or za[0-15]v.q");
+  case Match_InvalidMatrixTile16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1].h");
   case Match_InvalidMatrixTile32:
     return Error(Loc, "invalid matrix operand, expected za[0-3].s");
   case Match_InvalidMatrixTile64:
@@ -6881,6 +6883,7 @@ bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSVEExactFPImmOperandHalfOne:
   case Match_InvalidSVEExactFPImmOperandHalfTwo:
   case Match_InvalidSVEExactFPImmOperandZeroOne:
+  case Match_InvalidMatrixTile16:
   case Match_InvalidMatrixTile32:
   case Match_InvalidMatrixTile64:
   case Match_InvalidMatrix:
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 08929ed5616b2c..0b6f12770375d7 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5188,3 +5188,40 @@ class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
   let Inst{3-2}   = 0b00;
   let Inst{1-0}   = Zd{1-0};
 }
+
+class sme2_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp16:$ZAda),
+        (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bit ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000001000;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5} = 0;
+  let Inst{4} = S;
+  let Inst{3-1} = 0b100;
+  let Inst{0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_fmop4as_fp16_non_widening<bit S, string mnemonic> {
+  // Single vectors
+  def _ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+
+  // Multiple vectors
+  def _2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+}
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s
new file mode 100644
index 00000000000000..0272721e083621
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s
@@ -0,0 +1,220 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 < %s 2>&1 | FileCheck %s
+
+// FMOP4A
+
+// Single vectors
+
+fmop4a za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+fmop4a za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4a za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4a za0.h, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+fmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+
+// FMOP4S
+
+// Single vectors
+
+fmop4s za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+fmop4s za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4s za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4s za0.h, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+fmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
new file mode 100644
index 00000000000000..0ed125500d0639
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
@@ -0,0 +1,179 @@
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2,+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+// FMOP4A
+
+// Single vectors
+
+fmop4a  za0.h, z0.h, z16.h  // 10000001-00000000-00000000-00001000
+// CHECK-INST: fmop4a  za0.h, z0.h, z16.h
+// CHECK-ENCODING: [0x08,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000008 <unknown>
+
+fmop4a  za1.h, z12.h, z24.h  // 10000001-00001000-00000001-10001001
+// CHECK-INST: fmop4a  za1.h, z12.h, z24.h
+// CHECK-ENCODING: [0x89,0x01,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080189 <unknown>
+
+fmop4a  za1.h, z14.h, z30.h  // 10000001-00001110-00000001-11001001
+// CHECK-INST: fmop4a  za1.h, z14.h, z30.h
+// CHECK-ENCODING: [0xc9,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e01c9 <unknown>
+
+// Single and multiple vectors
+
+fmop4a  za0.h, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00001000
+// CHECK-INST: fmop4a  za0.h, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x08,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100008 <unknown>
+
+fmop4a  za1.h, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10001001
+// CHECK-INST: fmop4a  za1.h, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x89,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180189 <unknown>
+
+fmop4a  za1.h, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11001001
+// CHECK-INST: fmop4a  za1.h, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc9,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e01c9 <unknown>
+
+// Multiple and single vectors
+
+fmop4a  za0.h, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00001000
+// CHECK-INST: fmop4a  za0.h, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x08,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000208 <unknown>
+
+fmop4a  za1.h, {z12.h-z13.h}, z24.h  // 10000001-00001000-00000011-10001001
+// CHECK-INST: fmop4a  za1.h, { z12.h, z13.h }, z24.h
+// CHECK-ENCODING: [0x89,0x03,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080389 <unknown>
+
+fmop4a  za1.h, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11001001
+// CHECK-INST: fmop4a  za1.h, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xc9,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e03c9 <unknown>
+
+// Multiple vectors
+
+fmop4a  za0.h, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00001000
+// CHECK-INST: fmop4a  za0.h, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x08,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100208 <unknown>
+
+fmop4a  za1.h, {z12.h-z13.h}, {z24.h-z25.h}  // 10000001-00011000-00000011-10001001
+// CHECK-INST: fmop4a  za1.h, { z12.h, z13.h }, { z24.h, z25.h }
+// CHECK-ENCODING: [0x89,0x03,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180389 <unknown>
+
+fmop4a  za1.h, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11001001
+// CHECK-INST: fmop4a  za1.h, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc9,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e03c9 <unknown>
+
+// FMOP4S
+
+// Single vectors
+
+fmop4s  za0.h, z0.h, z16.h  // 10000001-00000000-00000000-00011000
+// CHECK-INST: fmop4s  za0.h, z0.h, z16.h
+// CHECK-ENCODING: [0x18,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000018 <unknown>
+
+fmop4s  za1.h, z12.h, z24.h  // 10000001-00001000-00000001-10011001
+// CHECK-INST: fmop4s  za1.h, z12.h, z24.h
+// CHECK-ENCODING: [0x99,0x01,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080199 <unknown>
+
+fmop4s  za1.h, z14.h, z30.h  // 10000001-00001110-00000001-11011001
+// CHECK-INST: fmop4s  za1.h, z14.h, z30.h
+// CHECK-ENCODING: [0xd9,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e01d9 <unknown>
+
+// Single and multiple vectors
+
+fmop4s  za0.h, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00011000
+// CHECK-INST: fmop4s  za0.h, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x18,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100018 <unknown>
+
+fmop4s  za1.h, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10011001
+// CHECK-INST: fmop4s  za1.h, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x99,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180199 <unknown>
+
+fmop4s  za1.h, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11011001
+// CHECK-INST: fmop4s  za1.h, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd9,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e01d9 <unknown>
+
+// Multiple and single vectors
+
+fmop4s  za0.h, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00011000
+// CHECK-INST: fmop4s  za0.h, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x18,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000218 <unknown>
+
+fmop4s  za1.h, {z12.h-z13.h}, z24.h  // 10000001-00001000-00000011-10011001
+// CHECK-INST: fmop4s  za1.h, { z12.h, z13.h }, z24.h
+// CHECK-ENCODING: [0x99,0x03,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080399 <unknown>
+
+fmop4s  za1.h, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11011001
+// CHECK-INST: fmop4s  za1.h, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xd9,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e03d9 <unknown>
+
+// Multiple vectors
+
+fmop4s  za0.h, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00011000
+// CHECK-INST: fmop4s  za0.h, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x18,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100218 <unknown>
+
+fmop4s  za1.h, {z12.h-z13.h}, {z24.h-z25.h}  // 10000001-00011000-00000011-10011001
+// CHECK-INST: fmop4s  za1.h, { z12.h, z13.h }, { z24.h, z25.h }
+// CHECK-ENCODING: [0x99,0x03,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180399 <unknown>
+
+fmop4s  za1.h, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11011001
+// CHECK-INST: fmop4s  za1.h, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd9,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e03d9 <unknown>

>From b3dcfb45ad724f8f46a6874ff807d02a5679eea1 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 25 Oct 2024 17:18:24 +0100
Subject: [PATCH 2/2] [fixup] Tweak internal instruction names and a test

---
 llvm/lib/Target/AArch64/SMEInstrFormats.td              | 8 ++++----
 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 0b6f12770375d7..9c6cc7ffb2b4b8 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5214,14 +5214,14 @@ class sme2_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic,
 
 multiclass sme2_fmop4as_fp16_non_widening<bit S, string mnemonic> {
   // Single vectors
-  def _ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+  def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
 
   // Multiple and single vectors
-  def _2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+  def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
 
   // Single and multiple vectors
-  def _Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+  def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
 
   // Multiple vectors
-  def _2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+  def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
 }
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
index 0ed125500d0639..2a94acd35e95c3 100644
--- a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
@@ -6,7 +6,7 @@
 // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
 // RUN:        | llvm-objdump -d --mattr=+sme2p2,+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST
 // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
-// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 // Disassemble encoding and check the re-encoding (-show-encoding) matches.
 // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \