[llvm] [AArch64] Add assembly/disassembly for BFMOP4{A, S} (widening) instructions (PR #113203)

Thu Oct 24 09:58:24 PDT 2024

https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/113203

>From 4f6f6540489cb429c7e71bb2797ab0a002d7a7e6 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 21 Oct 2024 18:25:30 +0100
Subject: [PATCH 1/3] [AArch64] Add assembly/disassembly for BFMOP4{A,S}
 (widening) instructions

The new instructions are described in
https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions
---
 llvm/lib/Target/AArch64/AArch64.td            |   9 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |   3 +
 llvm/lib/Target/AArch64/SMEInstrFormats.td    |  37 +++
 .../SME2p2/bfmop4as-widening-diagnostics.s    | 226 ++++++++++++++++++
 .../MC/AArch64/SME2p2/bfmop4as-widening.s     | 178 ++++++++++++++
 5 files changed, 451 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s

diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 2262ad1dfd0cc9..d5269394347c10 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -73,8 +73,13 @@ def SVEUnsupported : AArch64Unsupported {
                       SVE2Unsupported.F);
 }
 
-let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1, HasSVE2p1orSSVE_AES] in
-def SME2p1Unsupported : AArch64Unsupported;
+let F = [HasSME2p2] in
+def SME2p2Unsupported : AArch64Unsupported;
+
+let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
+def SME2p1Unsupported : AArch64Unsupported {
+  let F = !listconcat([HasSME2p1, HasSVE2p1_or_HasSME2p1, HasSVE2p1orSSVE_AES], SME2p2Unsupported.F);
+}
 
 def SME2Unsupported : AArch64Unsupported {
   let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 6044b5bb7d8151..b763aa15a7c3f1 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1004,6 +1004,9 @@ let Predicates = [HasSME2p2] in {
   def FTMOPA_M2ZZZI_HtoS  : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">;
   def FTMOPA_M2ZZZI_StoS  : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">;
   def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">;
+
+  defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">;
+  defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">;
 } // [HasSME2p2]
 
 let Predicates = [HasSME2p2, HasSMEB16B16] in {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 08929ed5616b2c..75fd65ebf62723 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5188,3 +5188,40 @@ class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
   let Inst{3-2}   = 0b00;
   let Inst{1-0}   = Zd{1-0};
 }
+
+class sme2_bf16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp32:$ZAda),
+        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<2> ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000001000;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5} = 0;
+  let Inst{4} = S;
+  let Inst{3-2} = 0b00;
+  let Inst{1-0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_bfmop4as_widening<bit S, string mnemonic> {
+  // Single vectors
+  def _ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+
+  // Multiple vectors
+  def _2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+}
diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
new file mode 100644
index 00000000000000..7bf425db67f8f5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
@@ -0,0 +1,226 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s
+
+// BFMOP4A
+
+// Single vectors
+
+bfmop4a za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+bfmop4a za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+bfmop4a za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+bfmop4a za0.s, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+
+
+
+
+
+
+
+// BFMOP4S
+
+// Single vectors
+
+bfmop4s za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+bfmop4s za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+bfmop4s za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+bfmop4s za0.s, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
new file mode 100644
index 00000000000000..2ebd978af76f63
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// BFMOP4A
+
+// Single vectors
+
+bfmop4a za0.s, z0.h, z16.h  // 10000001-00000000-00000000-00000000
+// CHECK-INST: bfmop4a za0.s, z0.h, z16.h
+// CHECK-ENCODING: [0x00,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000000 <unknown>
+
+bfmop4a za3.s, z14.h, z30.h  // 10000001-00001110-00000001-11000011
+// CHECK-INST: bfmop4a za3.s, z14.h, z30.h
+// CHECK-ENCODING: [0xc3,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e01c3 <unknown>
+
+bfmop4a za1.s, z10.h, z20.h  // 10000001-00000100-00000001-01000001
+// CHECK-INST: bfmop4a za1.s, z10.h, z20.h
+// CHECK-ENCODING: [0x41,0x01,0x04,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81040141 <unknown>
+
+// Single and multiple vectors
+
+bfmop4a za0.s, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00000000
+// CHECK-INST: bfmop4a za0.s, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x00,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100000 <unknown>
+
+bfmop4a za3.s, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11000011
+// CHECK-INST: bfmop4a za3.s, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc3,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e01c3 <unknown>
+
+bfmop4a za2.s, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10000010
+// CHECK-INST: bfmop4a za2.s, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x82,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81180182 <unknown>
+
+// Multiple and single vectors
+
+bfmop4a za0.s, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00000000
+// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x00,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000200 <unknown>
+
+bfmop4a za3.s, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11000011
+// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xc3,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e03c3 <unknown>
+
+bfmop4a za2.s, {z12.h-z13.h}, z28.h  // 10000001-00001100-00000011-10000010
+// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, z28.h
+// CHECK-ENCODING: [0x82,0x03,0x0c,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810c0382 <unknown>
+
+// Multiple vectors
+
+bfmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00000000
+// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x00,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100200 <unknown>
+
+bfmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11000011
+// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc3,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e03c3 <unknown>
+
+bfmop4a za2.s, {z12.h-z13.h}, {z26.h-z27.h}  // 10000001-00011010-00000011-10000010
+// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, { z26.h, z27.h }
+// CHECK-ENCODING: [0x82,0x03,0x1a,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811a0382 <unknown>
+
+
+// BFMOP4S
+
+// Single vectors
+
+bfmop4s za0.s, z0.h, z16.h  // 10000001-00000000-00000000-00010000
+// CHECK-INST: bfmop4s za0.s, z0.h, z16.h
+// CHECK-ENCODING: [0x10,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000010 <unknown>
+
+bfmop4s za3.s, z14.h, z30.h  // 10000001-00001110-00000001-11010011
+// CHECK-INST: bfmop4s za3.s, z14.h, z30.h
+// CHECK-ENCODING: [0xd3,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e01d3 <unknown>
+
+bfmop4s za1.s, z10.h, z20.h  // 10000001-00000100-00000001-01010001
+// CHECK-INST: bfmop4s za1.s, z10.h, z20.h
+// CHECK-ENCODING: [0x51,0x01,0x04,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81040151 <unknown>
+
+// Single and multiple vectors
+
+bfmop4s za0.s, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00010000
+// CHECK-INST: bfmop4s za0.s, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x10,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100010 <unknown>
+
+bfmop4s za3.s, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11010011
+// CHECK-INST: bfmop4s za3.s, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd3,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e01d3 <unknown>
+
+bfmop4s za2.s, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10010010
+// CHECK-INST: bfmop4s za2.s, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x92,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81180192 <unknown>
+
+// Multiple and single vectors
+
+bfmop4s za0.s, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00010000
+// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x10,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000210 <unknown>
+
+bfmop4s za3.s, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11010011
+// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xd3,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e03d3 <unknown>
+
+bfmop4s za2.s, {z12.h-z13.h}, z28.h  // 10000001-00001100-00000011-10010010
+// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, z28.h
+// CHECK-ENCODING: [0x92,0x03,0x0c,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810c0392 <unknown>
+
+// Multiple vectors
+
+bfmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00010000
+// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x10,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100210 <unknown>
+
+bfmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11010011
+// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd3,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e03d3 <unknown>
+
+bfmop4s za2.s, {z12.h-z13.h}, {z26.h-z27.h}  // 10000001-00011010-00000011-10010010
+// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, { z26.h, z27.h }
+// CHECK-ENCODING: [0x92,0x03,0x1a,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811a0392 <unknown>

>From 1a03d82ac2c127c4c6139079fca027f28775accc Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 22 Oct 2024 12:15:06 +0100
Subject: [PATCH 2/3] [fixup] Minor test changes (NFC)

---
 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s | 6 ------
 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s             | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
index 7bf425db67f8f5..5906bcb07f15d5 100644
--- a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
@@ -110,12 +110,6 @@ bfmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h}
 // CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
 
 
-
-
-
-
-
-
 // BFMOP4S
 
 // Single vectors
diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
index 2ebd978af76f63..40d08e503c8bb3 100644
--- a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
@@ -5,7 +5,7 @@
 // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
 // RUN:        | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
 // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
-// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 // Disassemble encoding and check the re-encoding (-show-encoding) matches.
 // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \

>From 5d8f379ba82920026bc666322915dd5d9ec7bb88 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 24 Oct 2024 17:55:36 +0100
Subject: [PATCH 3/3] [fixup] Bring internal instruction names into compliance
 with the naming convention

---
 llvm/lib/Target/AArch64/SMEInstrFormats.td | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 75fd65ebf62723..4cfe18eddf481c 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5214,14 +5214,14 @@ class sme2_bf16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnem
 
 multiclass sme2_bfmop4as_widening<bit S, string mnemonic> {
   // Single vectors
-  def _ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+  def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
 
   // Multiple and single vectors
-  def _2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+  def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
 
   // Single and multiple vectors
-  def _Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+  def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
 
   // Multiple vectors
-  def _2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+  def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
 }