[llvm] [AArch64] Add assembly/disassembly for FMOP4A (widening, 4-way) instructions (PR #113347)

Tue Oct 22 10:38:37 PDT 2024

https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/113347

None

>From 217c309e4ed25a30d7f5137abfd7b7043d784855 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 22 Oct 2024 17:01:47 +0100
Subject: [PATCH] [AArch64] Add assembly/disassembly for FMOP4A (widening,
 4-way) instructions

---
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |   5 +
 llvm/lib/Target/AArch64/SMEInstrFormats.td    |  35 +++++
 .../fmop4a-fp8-fp32-widening-diagnostics.s    | 120 ++++++++++++++++++
 .../AArch64/SME2p2/fmop4a-fp8-fp32-widening.s |  93 ++++++++++++++
 4 files changed, 253 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s

diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 802797a14ee42d..61cdf190c99f1f 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1000,3 +1000,8 @@ defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_fr
 
 } //[HasSMEF8F32]
 
+let Uses = [FPMR, FPCR] in {
+let Predicates = [HasSME2p2, HasSMEF8F32] in {
+  defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">;
+}
+}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 38d256c8234118..85baaec5e91896 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5126,3 +5126,38 @@ class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
   let Inst{3-2}   = 0b00;
   let Inst{1-0}   = Zd{1-0};
 }
+
+class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp32:$ZAda),
+        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<2> ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000000001;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5-2} = 0b0000;
+  let Inst{1-0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic> {
+  // Single vectors
+  def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>;
+
+  // Multiple vectors
+  def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>;
+}
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s
new file mode 100644
index 00000000000000..9a06192c0b30af
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s
@@ -0,0 +1,120 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 < %s 2>&1 | FileCheck %s
+
+// Single vectors
+
+fmop4a za0.d, z0.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, z0.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.d, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z15.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z16.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z0.b, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z17.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z14.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+// Single and multiple vectors
+
+fmop4a za0.d, z0.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, z0.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.d, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z1.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z16.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z0.b, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.b, {z17.b-z18.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, z0.b, {z16.b-z18.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.b, {z12.b-z13.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4a za0.d, {z0.b-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, {z0.b-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.s-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4a za0.s, {z1.b-z2.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z2.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z16.b-z17.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, {z0.b-z1.b}, z17.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, {z0.b-z1.b}, z12.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+// Multiple vectors
+
+fmop4a za0.d, {z0.b-z1.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, {z0.b-z1.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.s-z1.s}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z1.b-z2.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z2.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z18.b-z19.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.b-z1.b}, {z19.b-z20.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, {z18.b-z20.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.b-z1.b}, {z10.b-z11.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s
new file mode 100644
index 00000000000000..50f8aa6377f853
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s
@@ -0,0 +1,93 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2,+sme-f8f32 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Single vectors
+
+fmop4a  za0.s, z0.b, z16.b  // 10000000-00100000-00000000-00000000
+// CHECK-INST: fmop4a  za0.s, z0.b, z16.b
+// CHECK-ENCODING: [0x00,0x00,0x20,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80200000 <unknown>
+
+fmop4a  za1.s, z10.b, z20.b  // 10000000-00100100-00000001-01000001
+// CHECK-INST: fmop4a  za1.s, z10.b, z20.b
+// CHECK-ENCODING: [0x41,0x01,0x24,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80240141 <unknown>
+
+fmop4a  za3.s, z14.b, z30.b  // 10000000-00101110-00000001-11000011
+// CHECK-INST: fmop4a  za3.s, z14.b, z30.b
+// CHECK-ENCODING: [0xc3,0x01,0x2e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 802e01c3 <unknown>
+
+// Single and multiple vectors
+
+fmop4a  za0.s, z0.b, {z16.b-z17.b}  // 10000000-00110000-00000000-00000000
+// CHECK-INST: fmop4a  za0.s, z0.b, { z16.b, z17.b }
+// CHECK-ENCODING: [0x00,0x00,0x30,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80300000 <unknown>
+
+fmop4a  za1.s, z10.b, {z20.b-z21.b}  // 10000000-00110100-00000001-01000001
+// CHECK-INST: fmop4a  za1.s, z10.b, { z20.b, z21.b }
+// CHECK-ENCODING: [0x41,0x01,0x34,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80340141 <unknown>
+
+fmop4a  za3.s, z14.b, {z30.b-z31.b}  // 10000000-00111110-00000001-11000011
+// CHECK-INST: fmop4a  za3.s, z14.b, { z30.b, z31.b }
+// CHECK-ENCODING: [0xc3,0x01,0x3e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 803e01c3 <unknown>
+
+// Multiple and single vectors
+
+fmop4a  za0.s, {z0.b-z1.b}, z16.b  // 10000000-00100000-00000010-00000000
+// CHECK-INST: fmop4a  za0.s, { z0.b, z1.b }, z16.b
+// CHECK-ENCODING: [0x00,0x02,0x20,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80200200 <unknown>
+
+fmop4a  za1.s, {z10.b-z11.b}, z20.b  // 10000000-00100100-00000011-01000001
+// CHECK-INST: fmop4a  za1.s, { z10.b, z11.b }, z20.b
+// CHECK-ENCODING: [0x41,0x03,0x24,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80240341 <unknown>
+
+fmop4a  za3.s, {z14.b-z15.b}, z30.b  // 10000000-00101110-00000011-11000011
+// CHECK-INST: fmop4a  za3.s, { z14.b, z15.b }, z30.b
+// CHECK-ENCODING: [0xc3,0x03,0x2e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 802e03c3 <unknown>
+
+// Multiple vectors
+
+fmop4a  za0.s, {z0.b-z1.b}, {z16.b-z17.b}  // 10000000-00110000-00000010-00000000
+// CHECK-INST: fmop4a  za0.s, { z0.b, z1.b }, { z16.b, z17.b }
+// CHECK-ENCODING: [0x00,0x02,0x30,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80300200 <unknown>
+
+fmop4a  za1.s, {z10.b-z11.b}, {z20.b-z21.b}  // 10000000-00110100-00000011-01000001
+// CHECK-INST: fmop4a  za1.s, { z10.b, z11.b }, { z20.b, z21.b }
+// CHECK-ENCODING: [0x41,0x03,0x34,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80340341 <unknown>
+
+fmop4a  za3.s, {z14.b-z15.b}, {z30.b-z31.b}  // 10000000-00111110-00000011-11000011
+// CHECK-INST: fmop4a  za3.s, { z14.b, z15.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xc3,0x03,0x3e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 803e03c3 <unknown>