[llvm] [AArch64] Neoverse V1 scheduling info (PR #126707)
Julien Villette via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 02:10:43 PST 2025
https://github.com/jvillette38 created https://github.com/llvm/llvm-project/pull/126707
This PR fixes scheduling model for the Neoverse V1. All information is taken from the Neoverse V1 Software Optimisation Guide:
https://developer.arm.com/documentation/pjdoc466751330-9685/6-0
Changes:
- micro operations are reduced to maximum 3 and respect the number of max issues.
- use ReleaseAtCycles to specify throughput
- fix bypass latencies
- fix some latencies/throughput
Consider conflicts between SVE and ASIMD instructions.
Software Optimization Guide:
Maximum issue bandwidth is sustained using one of the following combinations:
• 2 SVE Uops.
• 4 ASIMD Uops.
• 1 SVE Uop on V0 and 2 ASIMD Uops on VX13.
• 1 SVE Uop on V1 and 2 ASIMD Uops on V02.
This merge request depends on #126703 due to new test: llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s.
This test reports all scheduling information changes from this patch if compared with the version of #126703.
@Rin18 may be interested.
>From 1a5ef7503d13d1e6aa553ff6911887bfc9929f39 Mon Sep 17 00:00:00 2001
From: Julien Villette <julien.villette at sipearl.com>
Date: Mon, 29 Jan 2024 11:14:14 +0100
Subject: [PATCH 1/2] [AArch64] Neoverse V1 scheduling model
This fix scheduling model for the Neoverse V1. All information is
taken from the Neoverse V1 Software Optimisation Guide:
https://developer.arm.com/documentation/pjdoc466751330-9685/6-0
Changes:
- micro operations are reduced to maximum 3
- use ReleaseAtCycles to specify throughput
- fix bypass latencies
- fix some latencies/throughput
---
.../Target/AArch64/AArch64SchedNeoverseV1.td | 1187 +--
.../AArch64/AArch64SchedPredNeoverse.td | 43 +
.../Target/AArch64/AArch64SchedPredicates.td | 34 +-
.../AArch64/Neoverse/V1-basic-instructions.s | 776 +-
.../llvm-mca/AArch64/Neoverse/V1-forwarding.s | 46 +-
.../AArch64/Neoverse/V1-neon-instructions.s | 810 +-
.../AArch64/Neoverse/V1-scheduling-info.s | 7588 +++++++++++++++++
.../AArch64/Neoverse/V1-sve-instructions.s | 2068 ++---
.../llvm-mca/AArch64/Neoverse/V1-writeback.s | 1798 ++--
.../AArch64/Neoverse/V1-zero-dependency.s | 6 +-
10 files changed, 11082 insertions(+), 3274 deletions(-)
create mode 100644 llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 368665467859f5f..9f2f11bafe79fe9 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -98,377 +98,452 @@ def V1Write_0c_0Z : SchedWriteRes<[]>;
def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; }
def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; }
-def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1; }
+def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1;
+ let NumMicroOps = 2; }
def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; }
+def V1Write_4c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_5c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 5;
+ let ReleaseAtCycles = [3]; }
+
def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; }
+def V1Write_6c2_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c3_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_7c4_1L : SchedWriteRes<[V1UnitL]> { let Latency = 7;
+ let ReleaseAtCycles = [4]; }
def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
-def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2; }
+def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; }
-def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; }
+def V1Write_4c6_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4;
+ let ReleaseAtCycles = [6]; }
def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; }
def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
+def V1Write_2c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2;
+ let ReleaseAtCycles = [2]; }
+def V1Write_3c2_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3;
+ let ReleaseAtCycles = [2]; }
def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; }
def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; }
-def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
- let ReleaseAtCycles = [5]; }
-def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
- let ReleaseAtCycles = [5]; }
+def V1Write_12c12_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def V1Write_20c20_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; }
+def V1Write_2c4_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
+def V1Write_4c2_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
+def V1Write_6c3_1V : SchedWriteRes<[V1UnitV]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_12c2_1V : SchedWriteRes<[V1UnitV1]> { let Latency = 12;
+ let ReleaseAtCycles = [2]; }
+def V1Write_14c2_1V : SchedWriteRes<[V1UnitV1]> { let Latency = 14;
+ let ReleaseAtCycles = [2]; }
+
def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; }
def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
+def V1Write_5c2_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 5;
+ let ReleaseAtCycles = [2]; }
def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
-def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
- let ReleaseAtCycles = [10]; }
-def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
- let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
-def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
- let ReleaseAtCycles = [7]; }
+def V1Write_6c4_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6;
+ let ReleaseAtCycles = [4]; }
+def V1Write_10c9_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
+ let ReleaseAtCycles = [9]; }
+def V1Write_11c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 11;
+ let ReleaseAtCycles = [10]; }
+def V1Write_12c11_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
+ let ReleaseAtCycles = [11]; }
+def V1Write_13c12_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
+ let ReleaseAtCycles = [12]; }
+def V1Write_15c14_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
+ let ReleaseAtCycles = [14]; }
+def V1Write_16c14_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
+ let ReleaseAtCycles = [14]; }
+def V1Write_19c18_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 19;
+ let ReleaseAtCycles = [18]; }
+def V1Write_20c20_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+
def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
-def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Write_4c2_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_4c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [3]; }
+def V1Write_6c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
+ let ReleaseAtCycles = [5]; }
+def V1Write_8c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 8;
+ let ReleaseAtCycles = [3]; }
+def V1Write_9c4_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 9;
+ let ReleaseAtCycles = [4]; }
+def V1Write_12c4_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 12;
+ let ReleaseAtCycles = [4]; }
+def V1Write_13c6_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 13;
+ let ReleaseAtCycles = [6]; }
+def V1Write_11c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 11;
+ let ReleaseAtCycles = [5]; }
def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
+def V1Write_4c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+def V1Write_6c4_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 6;
+ let ReleaseAtCycles = [4]; }
+def V1Write_7c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
+ let ReleaseAtCycles = [2]; }
def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
let ReleaseAtCycles = [7]; }
-def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
- let ReleaseAtCycles = [7]; }
-def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+def V1Write_9c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 9;
+ let ReleaseAtCycles = [2]; }
+def V1Write_10c3_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [3]; }
+def V1Write_10c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
let ReleaseAtCycles = [5]; }
-def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
- let ReleaseAtCycles = [11]; }
+def V1Write_10c9_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ReleaseAtCycles = [9]; }
+def V1Write_13c13_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+ let ReleaseAtCycles = [13]; }
def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
let ReleaseAtCycles = [7]; }
-def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
- let ReleaseAtCycles = [7]; }
+def V1Write_15c14_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
+ let ReleaseAtCycles = [14]; }
+def V1Write_16c8_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [8]; }
+def V1Write_16c15_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [15]; }
def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; }
def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; }
def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; }
+def V1Write_8c2_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 8;
+ let ReleaseAtCycles = [2]; }
+def V1Write_10c2_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 10;
+ let ReleaseAtCycles = [2]; }
def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
+def V1Write_4c2_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4;
+ let ReleaseAtCycles = [2]; }
+
//===----------------------------------------------------------------------===//
// Define generic 2 micro-op types
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 11, NumMicroOps = 2 in
-def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
-let Latency = 1, NumMicroOps = 2 in
-def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 3, NumMicroOps = 2 in
-def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 2 in
-def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 9, NumMicroOps = 2 in
-def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>;
-let Latency = 8, NumMicroOps = 2 in
-def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 5, NumMicroOps = 2 in
-def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
-let Latency = 2, NumMicroOps = 2 in
-def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 6, NumMicroOps = 2 in
-def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>;
-let Latency = 4, NumMicroOps = 2 in
-def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
+def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 3 micro-op types
+def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
-let Latency = 7, NumMicroOps = 3 in
-def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
-let Latency = 2, NumMicroOps = 3 in
-def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 4, NumMicroOps = 3 in
-def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 3 in
-def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 3 in
-def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 4 micro-op types
-
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0,
- V1UnitV0, V1UnitV0]>;
-let Latency = 7, NumMicroOps = 4 in
-def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 9, NumMicroOps = 4 in
-def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 11, NumMicroOps = 4 in
-def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV]>;
-let Latency = 10, NumMicroOps = 4 in
-def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV, V1UnitV]>;
-let Latency = 2, NumMicroOps = 4 in
-def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 4 in
-def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 8, NumMicroOps = 4 in
-def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 9, NumMicroOps = 4 in
-def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 10, NumMicroOps = 4 in
-def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 10, NumMicroOps = 4 in
-def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
- V1UnitV1, V1UnitV1]>;
-let Latency = 12, NumMicroOps = 4 in
-def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
- V1UnitV1, V1UnitV1]>;
-let Latency = 6, NumMicroOps = 4 in
-def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0]>;
-let Latency = 12, NumMicroOps = 4 in
-def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 4 in
-def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
+def V1Write_5c3_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [1,3];
+}
-//===----------------------------------------------------------------------===//
-// Define generic 5 micro-op types
-
-let Latency = 8, NumMicroOps = 5 in
-def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 14, NumMicroOps = 5 in
-def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
- V1UnitV0,
- V1UnitV1, V1UnitV1,
- V1UnitV13]>;
-let Latency = 9, NumMicroOps = 5 in
-def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 5 in
-def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 6 micro-op types
-
-let Latency = 6, NumMicroOps = 6 in
-def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
- V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 8, NumMicroOps = 6 in
-def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
- V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 2, NumMicroOps = 6 in
-def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 5, NumMicroOps = 6 in
-def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 6, NumMicroOps = 6 in
-def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 11, NumMicroOps = 6 in
-def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 11, NumMicroOps = 6 in
-def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
-let Latency = 13, NumMicroOps = 6 in
-def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 7 micro-op types
+def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
-let Latency = 8, NumMicroOps = 7 in
-def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
- V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 8, NumMicroOps = 7 in
-def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitS,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_9c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
-//===----------------------------------------------------------------------===//
-// Define generic 8 micro-op types
-
-let Latency = 9, NumMicroOps = 8 in
-def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL,
- V1UnitL, V1UnitL,
- V1UnitV, V1UnitV,
- V1UnitV, V1UnitV]>;
-let Latency = 2, NumMicroOps = 8 in
-def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 4, NumMicroOps = 8 in
-def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 12, NumMicroOps = 8 in
-def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
+def V1Write_9c6_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6, 6];
+}
-//===----------------------------------------------------------------------===//
-// Define generic 10 micro-op types
-
-let Latency = 13, NumMicroOps = 10 in
-def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01,
- V1UnitS, V1UnitS,
- V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01]>;
-let Latency = 7, NumMicroOps = 10 in
-def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 11, NumMicroOps = 10 in
-def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0]>;
+def V1Write_11c12_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [12, 12];
+}
-//===----------------------------------------------------------------------===//
-// Define generic 12 micro-op types
+def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
-let Latency = 7, NumMicroOps = 12 in
-def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV01, V1UnitV01, V1UnitV01,
- V1UnitV01, V1UnitV01, V1UnitV01]>;
+def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
-//===----------------------------------------------------------------------===//
-// Define generic 15 micro-op types
+def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
-let Latency = 7, NumMicroOps = 15 in
-def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitS, V1UnitS,
- V1UnitS, V1UnitS, V1UnitS,
- V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV]>;
+def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def V1Write_4c2_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_6c4_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4,4];
+}
+
+def V1Write_7c9_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [9,9];
+}
+
+def V1Write_11c18_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [18,18];
+}
+
+def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V1Write_2c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_2c3_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [3,3];
+}
+
+def V1Write_2c4_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4,4];
+}
+
+def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V1Write_4c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_4c8_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8,8];
+}
+
+def V1Write_5c3_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [3,3];
+}
+
+def V1Write_6c6_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6,6];
+}
+
+def V1Write_7c12_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [12,12];
+}
+
+def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V1Write_8c2_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V1Write_7c2_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def V1Write_4c2_1V0 : SchedWriteRes<[V1UnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ReleaseAtCycles = [2];
+}
+
+def V1Write_8c2_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2, 2];
+}
+
+def V1Write_8c2_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_8c3_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [3,3];
+}
+
+def V1Write_10c8_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8,8];
+}
+
+def V1Write_6c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
+}
+
+def V1Write_11c6_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6,6];
+}
+
+def V1Write_12c8_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8,8];
+}
-//===----------------------------------------------------------------------===//
-// Define generic 18 micro-op types
-
-let Latency = 19, NumMicroOps = 18 in
-def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitV, V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV]>;
-let Latency = 19, NumMicroOps = 18 in
-def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0,
- V1UnitV0, V1UnitV0, V1UnitV0]>;
//===----------------------------------------------------------------------===//
-// Define generic 27 micro-op types
-
-let Latency = 11, NumMicroOps = 27 in
-def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitL01, V1UnitL01, V1UnitL01,
- V1UnitS, V1UnitS, V1UnitS,
- V1UnitS, V1UnitS, V1UnitS,
- V1UnitS, V1UnitS, V1UnitS,
- V1UnitV, V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV,
- V1UnitV, V1UnitV, V1UnitV]>;
+// Define generic 3 micro-op types
+
+def V1Write_2c_1I_1L01_1D : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitD]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [2,2,2];
+}
+
+def V1Write_7c9_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [9,9,9];
+}
+
+def V1Write_11c18_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [18,18,18];
+}
+
+def V1Write_10c2_1L01_1V01_1S : SchedWriteRes<[V1UnitL01, V1UnitV01, V1UnitS]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [2,2,2];
+}
+
+def V1Write_13c6_1L01_1S_1V01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV01]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [6,6,6];
+}
+
+def V1Write_13c8_1L01_1S_1V01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV01]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ReleaseAtCycles = [8,8,8];
+}
//===----------------------------------------------------------------------===//
// Define forwarded types
@@ -528,7 +603,7 @@ def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>;
def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>;
-def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
+def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; let ReleaseAtCycles = [2]; }
def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>;
def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
@@ -537,7 +612,10 @@ def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>;
def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>;
-let Latency = 5, NumMicroOps = 2 in
+def V1Wr_ZMABHS : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
+def V1Rd_ZMABHS : SchedReadAdvance<2, [V1Wr_ZMABHS]>;
+
+let Latency = 5, NumMicroOps = 1 in
def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>;
@@ -597,13 +675,19 @@ def : SchedAlias<WriteI, V1Write_1c_1I>;
// ALU, basic, flagset
def : InstRW<[V1Write_1c_1I_1Flg],
- (instregex "^(ADD|SUB)S[WX]r[ir]$",
+ (instregex "^(ADD|SUB)S[WX]rr$",
"^(ADC|SBC)S[WX]r$",
"^ANDS[WX]ri$",
"^(AND|BIC)S[WX]rr$")>;
+// ALU, basic, no flagset
// ALU, extend and shift
-def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
+def V1WriteIEReg : SchedWriteVariant<
+ [SchedVar<RegExtendAndShiftPred, [V1Write_2c_1M]>,
+ SchedVar<NoSchedPred, [V1Write_1c_1I]>]>;
+def : SchedAlias<WriteIEReg, V1WriteIEReg>;
+def : InstRW<[V1WriteIEReg],
+ (instregex "^(ADD|SUB)[WX]rx$")>;
// Arithmetic, LSL shift, shift <= 4
// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
@@ -612,19 +696,33 @@ def V1WriteISReg : SchedWriteVariant<
SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
def : SchedAlias<WriteISReg, V1WriteISReg>;
+// ALU, basic, no flagset
+// ALU, extend and shift
+def V1WriteIERegS : SchedWriteVariant<
+ [SchedVar<RegExtendAndShiftPred, [V1Write_2c_1M_1Flg]>,
+ SchedVar<NoSchedPred, [V1Write_1c_1I_1Flg]>]>;
+def : InstRW<[V1WriteIERegS],
+ (instregex "^(ADD|SUB)S([WX]rx|Xrx64)$")>;
+
// Arithmetic, flagset, LSL shift, shift <= 4
// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
def V1WriteISRegS : SchedWriteVariant<
[SchedVar<IsCheapLSL, [V1Write_1c_1I_1Flg]>,
SchedVar<NoSchedPred, [V1Write_2c_1M_1Flg]>]>;
def : InstRW<[V1WriteISRegS],
- (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
+ (instregex "^(ADD|SUB)S(([WX]rs)|([WX]ri))$")>;
// Logical, shift, no flagset
def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
-// Logical, shift, flagset
-def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
+// AArch64: ALU, basic, flagset
+// AArch64: Logical, shift, flagset
+// AArch32: ALU, basic, unconditional, flagset
+// AArch32: Logical, shift by immed, flagset, unconditional
+def V1WriteLogSRegS : SchedWriteVariant<
+ [SchedVar<hasShiftedOpndPred, [V1Write_2c_1M_1Flg]>,
+ SchedVar<NoSchedPred, [V1Write_1c_1I_1Flg]>]>;
+def : InstRW<[V1WriteLogSRegS], (instregex "^(AND|BIC)S[XW]rs$")>;
// Flag manipulation instructions
def : InstRW<[V1Write_1c_1I_1Flg], (instrs SETF8, SETF16, RMIF, CFINV)>;
@@ -634,11 +732,11 @@ def : InstRW<[V1Write_1c_1I_1Flg], (instrs SETF8, SETF16, RMIF, CFINV)>;
// -----------------------------------------------------------------------------
// Divide
-def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
-def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
+def : SchedAlias<WriteID32, V1Write_12c12_1M0>;
+def : SchedAlias<WriteID64, V1Write_20c20_1M0>;
-def : SchedAlias<WriteIM32, V1Write_2c_1M>;
-def : SchedAlias<WriteIM64, V1Write_2c_1M>;
+def : SchedAlias<WriteIM32, V1Write_2c_1M0>;
+def : SchedAlias<WriteIM64, V1Write_2c_1M0>;
// Multiply
// Multiply accumulate, W-form
@@ -685,7 +783,7 @@ def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
// Bitfield extract, two regs
def V1WriteExtr : SchedWriteVariant<
[SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
- SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>;
+ SchedVar<NoSchedPred, [V1Write_3c_1M]>]>;
def : SchedAlias<WriteExtr, V1WriteExtr>;
// Bitfield move, basic
@@ -709,18 +807,27 @@ def : SchedAlias<WriteLD, V1Write_4c_1L>;
def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
def : SchedAlias<WriteAdr, V1Write_1c_1I>;
+// Load register, register offset, extend, scale by 2
+// Load register, register offset, extend
+def V1WriteLDRH : SchedWriteVariant<[
+ SchedVar<NeoverseScaledIdxPred, [V1Write_5c_1I_1L]>,
+ SchedVar<NoSchedPred, [V1Write_4c_1L]>]>;
+def : InstRW<[V1WriteLDRH, ReadAdrBase], (instregex "^LDRS?H[HWX]ro[WX]$")>;
+
// Load pair, immed offset
def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
+def : InstRW<[V1Write_4c3_1L, V1Write_0c_0Z], (instrs LDPXi, LDNPXi)>;
def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z],
(instrs LDPWpost, LDPWpre)>;
+def : InstRW<[WriteAdr, V1Write_4c3_1L, V1Write_0c_0Z],
+ (instrs LDPXpost, LDPXpre)>;
// Load pair, signed immed offset, signed words
-def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
+def : InstRW<[V1Write_5c3_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
// Load pair, immed post or pre-index, signed words
-def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
- (instrs LDPSWpost, LDPSWpre)>;
+def : InstRW<[WriteAdr, V1Write_5c3_1L, V1Write_0c_0Z], (instrs LDPSWpost, LDPSWpre)>;
// Store instructions
@@ -729,12 +836,22 @@ def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
// Store register, immed offset
def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
+// Store register, immed post-index
+// Store register, immed pre-index
+def : InstRW<[V1Write_1c_1L01_1D], (instrs STRXpost, STRXpre)>;
+
// Store register, immed offset, index
def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
// Store pair, immed offset
def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
+def V1WriteSTRH : SchedWriteVariant<[
+ SchedVar<NeoverseScaledIdxPred, [V1Write_2c_1I_1L01_1D]>,
+ SchedVar<NoSchedPred, [V1Write_1c_1L01_1D]>]>;
+def : InstRW<[V1WriteSTRH, ReadAdrBase],
+ (instregex "^STRHHro[XW]$")>;
+
// FP data processing instructions
// -----------------------------------------------------------------------------
@@ -750,21 +867,23 @@ def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
// FP divide
// FP square root
-def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
+def : SchedAlias<WriteFDiv, V1Write_10c5_1V02>;
// FP divide, H-form
// FP square root, H-form
-def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
+def : InstRW<[V1Write_7c2_1V02], (instrs FDIVHrr, FSQRTHr)>;
// FP divide, S-form
+def : InstRW<[V1Write_10c3_1V02], (instrs FDIVSrr)>;
+
// FP square root, S-form
-def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
+def : InstRW<[V1Write_9c3_1V02], (instrs FSQRTSr)>;
// FP divide, D-form
def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
// FP square root, D-form
-def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
+def : InstRW<[V1Write_16c8_1V02], (instrs FSQRTDr)>;
// FP multiply
def : WriteRes<WriteFMul, [V1UnitV]> { let Latency = 3; }
@@ -788,7 +907,7 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
// FP convert, from vec to gen reg
-def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
+def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
// FP convert, Javascript from vec to gen reg
def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
@@ -835,13 +954,17 @@ def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
// Load vector reg, register offset, scale, H/Q-form
// Load vector reg, register offset, extend, scale, H/Q-form
-def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
+def V1WriteLDRV : SchedWriteVariant<[
+ SchedVar<NeoverseScaledIdxPred, [V1Write_7c_1I_1L]>,
+ SchedVar<NoSchedPred, [V1Write_6c_1L]>]>;
+
+def : InstRW<[V1WriteLDRV, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
// Load vector pair, immed offset, S/D-form
def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
// Load vector pair, immed offset, Q-form
-def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+def : InstRW<[V1Write_6c2_1L, V1Write_0c_0Z], (instrs LDPQi, LDNPQi)>;
// Load vector pair, immed post-index, S/D-form
// Load vector pair, immed pre-index, S/D-form
@@ -850,7 +973,7 @@ def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z],
// Load vector pair, immed post-index, Q-form
// Load vector pair, immed pre-index, Q-form
-def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi],
+def : InstRW<[WriteAdr, V1Write_6c2_1L, V1Write_0c_0Z],
(instrs LDPQpost, LDPQpre)>;
@@ -862,25 +985,26 @@ def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
// Store vector reg, immed post-index, B/H/S/D/Q-form
// Store vector reg, immed pre-index, B/H/S/D/Q-form
-def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
+def : InstRW<[V1Write_2c_1L01_1V01],
(instregex "^STR[BHSDQ](pre|post)$")>;
// Store vector reg, unsigned immed, B/H/S/D/Q-form
def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
-// Store vector reg, register offset, basic, B/S/D-form
-// Store vector reg, register offset, scale, B/S/D-form
-// Store vector reg, register offset, extend, B/S/D-form
-// Store vector reg, register offset, extend, scale, B/S/D-form
+// Store vector reg, register offset, extend, B/H/S/D-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, H-form
+// Store vector reg, register offset, extend, scale, Q-form
+def V1WriteSTRV : SchedWriteVariant<[
+ SchedVar<NeoverseScaledIdxPred, [V1Write_2c_1I_1L01_1V01]>,
+ SchedVar<NoSchedPred, [V1Write_2c_1L01_1V01]>]>;
+def : InstRW<[V1WriteSTRV, ReadAdrBase],
+ (instregex "^STR[BHQ]ro[XW]$")>;
+
+// Store vector reg, register offset, extend, scale, S/D-form
+// Store vector reg, register offset, scale, S/D-form
def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
- (instregex "^STR[BSD]ro[WX]$")>;
-
-// Store vector reg, register offset, basic, H/Q-form
-// Store vector reg, register offset, scale, H/Q-form
-// Store vector reg, register offset, extend, H/Q-form
-// Store vector reg, register offset, extend, scale, H/Q-form
-def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
- (instregex "^STR[HQ]ro[WX]$")>;
+ (instregex "^STR[DS]ro[XW]$")>;
// Store vector pair, immed offset, S/D/Q-form
def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
@@ -892,7 +1016,7 @@ def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
// Store vector pair, immed post-index, Q-form
// Store vector pair, immed pre-index, Q-form
-def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, V1Write_2c2_1L01_1V01], (instrs STPQpre, STPQpost)>;
// ASIMD integer instructions
@@ -921,12 +1045,12 @@ def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
// ASIMD arith, reduce, 8B/8H
// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
+def : InstRW<[V1Write_4c_1V13], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
"^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
// ASIMD arith, reduce, 16B
// ASIMD max/min, reduce, 16B
-def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
+def : InstRW<[V1Write_4c2_1V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
"[SU](MAX|MIN)Vv16i8v$")>;
// ASIMD dot product
@@ -956,23 +1080,25 @@ def : InstRW<[V1Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
// ASIMD multiply long
-def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
+def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULL[vi]")>;
// ASIMD shift accumulate
-def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
+def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]R?SRA(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^[SU]R?SRAd")>;
// ASIMD shift by immed, complex
// ASIMD shift by register, complex
def : InstRW<[V1Write_4c_1V13],
(instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
- "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
- "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
+ "^(SQSHLU?|UQSHL|SQSHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", "^[SU]Q?R?SHRU?N[bhsd]", "^[SU]RSHRd",
"^[SU]Q?RSHLv", "^[SU]QSHLv")>;
// ASIMD shift by immed, basic
// ASIMD shift by immed and insert, basic
// ASIMD shift by register, basic
def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
+ "^[SU]S?H[LR]d$", "^S[LR]Id$",
"^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
@@ -994,7 +1120,7 @@ def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$")>;
def : InstRW<[V1Wr_FCMA, V1Rd_FCMA], (instregex "^FCMLAv")>;
// ASIMD FP multiply
-def : InstRW<[V1Wr_FPM], (instregex "^FMULX?v")>;
+def : InstRW<[V1Wr_FPM], (instregex "^FMULX?(v|16|32|64)")>;
// ASIMD FP multiply accumulate
def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>;
@@ -1003,13 +1129,13 @@ def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>;
def : InstRW<[V1Wr_FPMAL, V1Rd_FPMAL], (instregex "^FML[AS]L2?v")>;
// ASIMD FP convert, long (F16 to F32)
-def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
+def : InstRW<[V1Write_4c2_1V02], (instregex "^FCVTLv[48]i16$")>;
// ASIMD FP convert, long (F32 to F64)
def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
// ASIMD FP convert, narrow (F32 to F16)
-def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
+def : InstRW<[V1Write_4c2_1V02], (instregex "^FCVTNv[48]i16$")>;
// ASIMD FP convert, narrow (F64 to F32)
def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
@@ -1019,31 +1145,31 @@ def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
"^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
"^FCVT[AMNPZ][SU]v1i64$",
+ "^FCVT[AMNPZ][SU]v1i32$",
+ "^FCVT[AMNPZ][SU]v1f16$",
"^FCVTZ[SU]d$",
+ "^FCVTZ[SU]s$",
+ "^FCVTZ[SU]h$",
"^[SU]CVTFv2f(32|64)$",
"^[SU]CVTFv2i(32|64)_shift$",
"^[SU]CVTFv1i64$",
- "^[SU]CVTFd$")>;
+ "^[SU]CVTFv1i32$",
+ "^[SU]CVTFv1i16$",
+ "^[SU]CVTFd$",
+ "^[SU]CVTFs$",
+ "^[SU]CVTFh$")>;
// ASIMD FP convert, other, D-form F16 and Q-form F32
-def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+def : InstRW<[V1Write_4c2_1V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
"^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
- "^FCVT[AMNPZ][SU]v1i32$",
- "^FCVTZ[SU]s$",
"^[SU]CVTFv4f(16|32)$",
- "^[SU]CVTFv4i(16|32)_shift$",
- "^[SU]CVTFv1i32$",
- "^[SU]CVTFs$")>;
+ "^[SU]CVTFv4i(16|32)_shift$")>;
// ASIMD FP convert, other, Q-form F16
-def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+def : InstRW<[V1Write_6c4_1V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
"^FCVT[AMNPZ][SU]v8i16_shift$",
- "^FCVT[AMNPZ][SU]v1f16$",
- "^FCVTZ[SU]h$",
"^[SU]CVTFv8f16$",
- "^[SU]CVTFv8i16_shift$",
- "^[SU]CVTFv1i16$",
- "^[SU]CVTFh$")>;
+ "^[SU]CVTFv8i16_shift$")>;
// ASIMD FP divide, D-form, F16
// ASIMD FP square root, D-form, F16
@@ -1051,42 +1177,42 @@ def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
// ASIMD FP divide, F32
// ASIMD FP square root, F32
-def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
- FSQRTv2f32, FSQRTv4f32)>;
+def : InstRW<[V1Write_10c5_1V02], (instrs FDIVv2f32, FSQRTv2f32)>;
+def : InstRW<[V1Write_10c9_1V02], (instrs FDIVv4f32, FSQRTv4f32)>;
// ASIMD FP divide, Q-form, F16
-def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
+def : InstRW<[V1Write_13c13_1V02], (instrs FDIVv8f16)>;
// ASIMD FP divide, Q-form, F64
-def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
+def : InstRW<[V1Write_15c14_1V02], (instrs FDIVv2f64)>;
// ASIMD FP square root, Q-form, F16
-def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
+def : InstRW<[V1Write_13c13_1V02], (instrs FSQRTv8f16)>;
// ASIMD FP square root, Q-form, F64
-def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
+def : InstRW<[V1Write_16c15_1V02], (instrs FSQRTv2f64)>;
// ASIMD FP max/min, reduce, F32 and D-form F16
-def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
+def : InstRW<[V1Write_4c2_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
// ASIMD FP max/min, reduce, Q-form F16
-def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
+def : InstRW<[V1Write_6c3_1V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
// ASIMD FP round, D-form F32 and Q-form F64
def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
// ASIMD FP round, D-form F16 and Q-form F32
-def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
+def : InstRW<[V1Write_4c2_1V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
// ASIMD FP round, Q-form F16
-def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+def : InstRW<[V1Write_6c4_1V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
// ASIMD BF instructions
// -----------------------------------------------------------------------------
// ASIMD convert, F32 to BF16
-def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
+def : InstRW<[V1Write_4c2_1V02], (instrs BFCVTN, BFCVTN2)>;
// ASIMD dot product
def : InstRW<[V1Wr_BFD, V1Rd_BFD], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
@@ -1113,7 +1239,6 @@ def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
// ASIMD insert, element to element
// ASIMD move, FP immed
// ASIMD move, integer immed
-// ASIMD reverse
// ASIMD table lookup, 1 or 2 table regs
// ASIMD table lookup extension, 1 table reg
// ASIMD transfer, element to gen reg
@@ -1132,20 +1257,21 @@ def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
// ASIMD reciprocal and square root estimate, D-form F32 and F64
def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
URSQRTEv2i32,
- FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
- FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
+ FRECPEv2f32, FRSQRTEv2f32,
+ FRECPEv1f16, FRECPEv1i32, FRECPEv1i64,
+ FRSQRTEv1f16, FRSQRTEv1i32, FRSQRTEv1i64)>;
// ASIMD reciprocal and square root estimate, Q-form U32
// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
-def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
+def : InstRW<[V1Write_4c2_1V02], (instrs URECPEv4i32,
URSQRTEv4i32,
- FRECPEv1f16, FRECPEv4f16,
+ FRECPEv4f16,
FRECPEv4f32, FRECPEv2f64,
- FRSQRTEv1f16, FRSQRTEv4f16,
+ FRSQRTEv4f16,
FRSQRTEv4f32, FRSQRTEv2f64)>;
// ASIMD reciprocal and square root estimate, Q-form F16
-def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
+def : InstRW<[V1Write_6c4_1V02], (instrs FRECPEv8f16,
FRSQRTEv8f16)>;
// ASIMD reciprocal exponent
@@ -1155,27 +1281,33 @@ def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
"^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
+// ASIMD reverse
+def : InstRW<[V1Write_1c_1I], (instregex "^REVXr$")>;
+
+
// ASIMD table lookup, 1 or 2 table regs
// ASIMD table lookup extension, 1 table reg
-def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
+def : InstRW<[V1Write_2c_1V01], (instregex "^TBLv(8|16)i8(One|Two)$",
"^TBXv(8|16)i8One$")>;
// ASIMD table lookup, 3 table regs
// ASIMD table lookup extension, 2 table reg
-def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
+def : InstRW<[V1Write_4c2_1V01], (instrs TBLv8i8Three, TBLv16i8Three,
TBXv8i8Two, TBXv16i8Two)>;
// ASIMD table lookup, 4 table regs
-def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
+def : InstRW<[V1Write_4c3_1V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
// ASIMD table lookup extension, 3 table reg
-def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
+def : InstRW<[V1Write_6c3_1V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
// ASIMD table lookup extension, 4 table reg
-def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
+def : InstRW<[V1Write_6c5_1V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
// ASIMD transfer, element to gen reg
-def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
+def : InstRW<[V1Write_2c4_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
+ "^UMOVvi(8|16|32|64)_idx0$",
+ "^SMOVvi(8|16|32)to(32|64)_idx0$",
"^UMOVvi(8|16|32|64)$")>;
// ASIMD transfer, gen reg to element
@@ -1192,27 +1324,27 @@ def : InstRW<[WriteAdr, V1Write_6c_1L],
(instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg
-def : InstRW<[V1Write_6c_2L],
+def : InstRW<[V1Write_6c2_1L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_6c_2L],
+def : InstRW<[WriteAdr, V1Write_6c2_1L],
(instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg
-def : InstRW<[V1Write_6c_3L],
+def : InstRW<[V1Write_6c3_1L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_6c_3L],
+def : InstRW<[WriteAdr, V1Write_6c3_1L],
(instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
-def : InstRW<[V1Write_6c_2L],
+def : InstRW<[V1Write_6c2_1L],
(instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, V1Write_6c_2L],
+def : InstRW<[WriteAdr, V1Write_6c2_1L],
(instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[V1Write_7c_4L],
+def : InstRW<[V1Write_7c4_1L],
(instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_7c_4L],
+def : InstRW<[WriteAdr, V1Write_7c4_1L],
(instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane
@@ -1225,60 +1357,60 @@ def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
"^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form
-def : InstRW<[V1Write_8c_1L_2V],
+def : InstRW<[V1Write_8c_1L_1V],
(instregex "^LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
+def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
(instregex "^LD2Twov(8b|4h|2s)_POST$")>;
// ASIMD load, 2 element, multiple, Q-form
-def : InstRW<[V1Write_8c_2L_2V],
+def : InstRW<[V1Write_8c2_1L_1V],
(instregex "^LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_2L_2V],
+def : InstRW<[WriteAdr, V1Write_8c2_1L_1V],
(instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane
// ASIMD load, 2 element, all lanes
-def : InstRW<[V1Write_8c_1L_2V],
+def : InstRW<[V1Write_8c2_1L_1V],
(instregex "^LD2i(8|16|32|64)$",
"^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
+def : InstRW<[WriteAdr, V1Write_8c2_1L_1V],
(instregex "^LD2i(8|16|32|64)_POST$",
"^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form
// ASIMD load, 3 element, one lane
// ASIMD load, 3 element, all lanes
-def : InstRW<[V1Write_8c_2L_3V],
+def : InstRW<[V1Write_8c3_1L_1V],
(instregex "^LD3Threev(8b|4h|2s)$",
"^LD3i(8|16|32|64)$",
"^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_2L_3V],
+def : InstRW<[WriteAdr, V1Write_8c3_1L_1V],
(instregex "^LD3Threev(8b|4h|2s)_POST$",
"^LD3i(8|16|32|64)_POST$",
"^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 3 element, multiple, Q-form
-def : InstRW<[V1Write_8c_3L_3V],
+def : InstRW<[V1Write_8c3_1L_1V],
(instregex "^LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_3L_3V],
+def : InstRW<[WriteAdr, V1Write_8c3_1L_1V],
(instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form
// ASIMD load, 4 element, one lane
// ASIMD load, 4 element, all lanes
-def : InstRW<[V1Write_8c_3L_4V],
+def : InstRW<[V1Write_8c3_1L_1V],
(instregex "^LD4Fourv(8b|4h|2s)$",
"^LD4i(8|16|32|64)$",
"^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_8c_3L_4V],
+def : InstRW<[WriteAdr, V1Write_8c3_1L_1V],
(instregex "^LD4Fourv(8b|4h|2s)_POST$",
"^LD4i(8|16|32|64)_POST$",
"^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
// ASIMD load, 4 element, multiple, Q-form
-def : InstRW<[V1Write_9c_4L_4V],
+def : InstRW<[V1Write_9c6_1L_1V],
(instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_9c_4L_4V],
+def : InstRW<[WriteAdr, V1Write_9c6_1L_1V],
(instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
@@ -1297,25 +1429,25 @@ def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
// ASIMD store, 1 element, multiple, 2 reg, Q-form
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, D-form
-def : InstRW<[V1Write_2c_2L01_2V01],
+def : InstRW<[V1Write_2c2_1L01_1V01],
(instregex "^ST1Twov(16b|8h|4s|2d)$",
"^ST1Threev(8b|4h|2s|1d)$",
"^ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01],
+def : InstRW<[WriteAdr, V1Write_2c2_1L01_1V01],
(instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
"^ST1Threev(8b|4h|2s|1d)_POST$",
"^ST1Fourv(8b|4h|2s|1d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[V1Write_2c_3L01_3V01],
+def : InstRW<[V1Write_2c3_1L01_1V01],
(instregex "^ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01],
+def : InstRW<[WriteAdr, V1Write_2c3_1L01_1V01],
(instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[V1Write_2c_4L01_4V01],
+def : InstRW<[V1Write_2c4_1L01_1V01],
(instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01],
+def : InstRW<[WriteAdr, V1Write_2c4_1L01_1V01],
(instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane
@@ -1334,45 +1466,45 @@ def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01],
// ASIMD store, 3 element, multiple, D-form
// ASIMD store, 3 element, one lane
// ASIMD store, 4 element, one lane, D
-def : InstRW<[V1Write_4c_2L01_2V01],
+def : InstRW<[V1Write_4c2_1L01_1V01],
(instregex "^ST2Twov(16b|8h|4s|2d)$",
"^ST3Threev(8b|4h|2s)$",
"^ST3i(8|16|32|64)$",
"^ST4i64$")>;
-def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01],
+def : InstRW<[WriteAdr, V1Write_4c2_1L01_1V01],
(instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
"^ST3Threev(8b|4h|2s)_POST$",
"^ST3i(8|16|32|64)_POST$",
"^ST4i64_POST$")>;
// ASIMD store, 3 element, multiple, Q-form
-def : InstRW<[V1Write_5c_3L01_3V01],
+def : InstRW<[V1Write_5c3_1L01_1V01],
(instregex "^ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01],
+def : InstRW<[WriteAdr, V1Write_5c3_1L01_1V01],
(instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, multiple, D-form
-def : InstRW<[V1Write_6c_3L01_3V01],
+def : InstRW<[V1Write_6c6_1L01_1V01],
(instregex "^ST4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01],
+def : InstRW<[WriteAdr, V1Write_6c6_1L01_1V01],
(instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, B/H/S
-def : InstRW<[V1Write_7c_6L01_6V01],
+def : InstRW<[V1Write_7c12_1L01_1V01],
(instregex "^ST4Fourv(16b|8h|4s)$")>;
-def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01],
+def : InstRW<[WriteAdr, V1Write_7c12_1L01_1V01],
(instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[V1Write_4c_4L01_4V01],
+def : InstRW<[V1Write_4c8_1L01_1V01],
(instrs ST4Fourv2d)>;
-def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01],
+def : InstRW<[WriteAdr, V1Write_4c8_1L01_1V01],
(instrs ST4Fourv2d_POST)>;
// ASIMD store, 4 element, one lane, B/H/S
-def : InstRW<[V1Write_6c_3L_3V],
+def : InstRW<[V1Write_6c2_1L01_1V01],
(instregex "^ST4i(8|16|32)$")>;
-def : InstRW<[WriteAdr, V1Write_6c_3L_3V],
+def : InstRW<[WriteAdr, V1Write_6c2_1L01_1V01],
(instregex "^ST4i(8|16|32)_POST$")>;
@@ -1424,11 +1556,11 @@ def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
// Loop control, based on predicate and flag setting
-def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
+def : InstRW<[V1Write_3c2_1M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
BRKPAS_PPzPP, BRKPBS_PPzPP)>;
// Loop control, based on GPR
-def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
+def : InstRW<[V1Write_3c2_1M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
// Loop terminate
def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
@@ -1445,14 +1577,14 @@ def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$"
"^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
// Predicate counting vector, active predicate
-def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
+def : InstRW<[V1Write_7c2_1M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
// Predicate logical
def : InstRW<[V1Write_1c_1M0],
(instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
// Predicate logical, flag setting
-def : InstRW<[V1Write_2c_2M0],
+def : InstRW<[V1Write_2c2_1M0],
(instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
// Predicate reverse
@@ -1475,7 +1607,7 @@ def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
// Predicate set/initialize, set flags
-def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
+def : InstRW<[V1Write_3c2_1M0], (instregex "^PTRUES_[BHSD]$")>;
@@ -1495,8 +1627,8 @@ def : InstRW<[V1Write_2c_1V01],
"^SUBR_Z(I|P[mZ]Z)_[BHSD]",
"^(AND|EOR|ORR)_ZI$",
"^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ",
- "^EOR(BT|TB)_ZZZ_[BHSD]$",
- "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>;
+ "^EOR(BT|TB)_ZZZ_[BHSD]$",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift
def : InstRW<[V1Write_2c_1V1],
@@ -1529,14 +1661,15 @@ def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
"^SPLICE_ZPZZ?_[BHSD]$")>;
// Convert to floating point, 64b to float or convert to double
-def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
- "^[SU]CVTF_ZPmZ_StoD")>;
+def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]")>;
+
// Convert to floating point, 32b to single or half
-def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
+def : InstRW<[V1Write_4c2_1V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
// Convert to floating point, 16b to half
-def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+def : InstRW<[V1Write_6c4_1V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
// Copy, scalar
def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
@@ -1545,11 +1678,11 @@ def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
// Divides, 32 bit
-def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+def : InstRW<[V1Write_12c11_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
"^[SU]DIV_ZPZZ_S")>;
// Divides, 64 bit
-def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+def : InstRW<[V1Write_20c20_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
"^[SU]DIV_ZPZZ_D")>;
// Dot product, 8 bit
@@ -1592,10 +1725,10 @@ def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
// Horizontal operations, D form, imm, imm
-def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
+def : InstRW<[V1Write_5c2_1V0], (instrs INDEX_II_D)>;
// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
-def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[V1Write_8c2_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
// Move prefix
def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
@@ -1611,7 +1744,7 @@ def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
"^[SU]MULH_ZPZZ_[BHS]")>;
// Multiply, D element size
-def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+def : InstRW<[V1Write_5c2_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
"^MUL_ZPZZ_D",
"^[SU]MULH_(ZPmZ|ZZZ)_D",
"^[SU]MULH_ZPZZ_D")>;
@@ -1624,29 +1757,29 @@ def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD],
// Multiply accumulate, B, H, S element size
// NOTE: This is not specified in the SOG.
-def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
+def : InstRW<[V1Wr_ZMABHS, ReadDefault, V1Rd_ZMABHS], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
// Predicate counting vector
-def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
+def : InstRW<[V1Write_2c_1V01], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
// Reduction, arithmetic, B form
-def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
+def : InstRW<[V1Write_14c2_1V],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
// Reduction, arithmetic, H form
-def : InstRW<[V1Write_12c_1V_1V01_2V1],
+def : InstRW<[V1Write_12c2_1V],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
// Reduction, arithmetic, S form
-def : InstRW<[V1Write_10c_1V_1V01_2V1],
+def : InstRW<[V1Write_10c2_1V1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
// Reduction, arithmetic, D form
-def : InstRW<[V1Write_8c_1V_1V01],
+def : InstRW<[V1Write_8c2_1V1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
// Reduction, logical
-def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
+def : InstRW<[V1Write_12c4_1V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
// Reverse, vector
def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
@@ -1684,13 +1817,13 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
"^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
// Floating point associative add, F16
-def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
+def : InstRW<[V1Write_19c18_1V0], (instrs FADDA_VPZ_H)>;
// Floating point associative add, F32
-def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
+def : InstRW<[V1Write_11c10_1V0], (instrs FADDA_VPZ_S)>;
// Floating point associative add, F64
-def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
+def : InstRW<[V1Write_8c3_1V01], (instrs FADDA_VPZ_D)>;
// Floating point compare
def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
@@ -1706,29 +1839,29 @@ def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
// Floating point convert to integer, F32
-def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
- "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
+def : InstRW<[V1Write_4c2_1V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTZ[SU]_ZPmZ_(StoS|StoD)")>;
// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
// Floating point convert to integer, F64
def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
- "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
+ "^FCVTZ[SU]_ZPmZ_(DtoS|DtoD)")>;
// Floating point convert to integer, F16
-def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
+def : InstRW<[V1Write_6c4_1V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoH|HtoS|HtoD)")>;
// Floating point copy
def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
"^FDUP_ZI_[HSD]$")>;
// Floating point divide, F16
-def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+def : InstRW<[V1Write_13c12_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
// Floating point divide, F32
-def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+def : InstRW<[V1Write_10c9_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
// Floating point divide, F64
-def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+def : InstRW<[V1Write_15c14_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
// Floating point min/max
def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
@@ -1752,10 +1885,10 @@ def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA],
def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
// Floating point reciprocal estimate, F16
-def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
+def : InstRW<[V1Write_6c_1V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
// Floating point reciprocal estimate, F32
-def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
+def : InstRW<[V1Write_4c_1V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
// Floating point reciprocal estimate, F64
def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
@@ -1764,13 +1897,13 @@ def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>;
// Floating point reduction, F16
-def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
+def : InstRW<[V1Write_13c6_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
// Floating point reduction, F32
-def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
+def : InstRW<[V1Write_11c5_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
// Floating point reduction, F64
-def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
+def : InstRW<[V1Write_9c4_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
// Floating point round to integral, F16
def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
@@ -1782,13 +1915,13 @@ def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>;
+def : InstRW<[V1Write_13c12_1V0], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>;
+def : InstRW<[V1Write_10c9_1V0], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>;
+def : InstRW<[V1Write_16c14_1V0], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric
def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
@@ -1825,9 +1958,9 @@ def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
// Contiguous load, scalar + scalar
// Contiguous load broadcast, scalar + imm
// Contiguous load broadcast, scalar + scalar
-def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
+def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BWD]_IMM$",
"^LD1S?B_[HSD]_IMM$",
- "^LD1S?H_[SD]_IMM$",
+ "^LD1SH_[SD]_IMM$",
"^LD1S?W_D_IMM$",
"^LD1[BWD]$",
"^LD1S?B_[HSD]$",
@@ -1838,8 +1971,11 @@ def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
"^LD1RS?H_[SD]_IMM$",
"^LD1RS?W_D_IMM$",
"^LD1RQ_[BHWD]_IMM$",
- "^LD1RQ_[BWD]$")>;
+ "^LD1RQ_[BDW]$")>;
+
def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
+ "^LD1H_IMM$",
+ "^LD1H_[SD]_IMM$",
"^LD1S?H_[SD]$",
"^LD1RQ_H$")>;
@@ -1864,42 +2000,47 @@ def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
"^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
-def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
+def : InstRW<[V1Write_8c2_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + scalar
-def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
-def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>;
+def : InstRW<[V1Write_10c2_1L01_1V01_1S], (instrs LD2H)>;
+def : InstRW<[V1Write_9c2_1L01_1V01], (instregex "^LD2[BWD]$")>;
// Contiguous Load three structures to three vectors, scalar + imm
-def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
+def : InstRW<[V1Write_11c6_1L01_1V01], (instregex "^LD3[BHWD]_IMM$")>;
// Contiguous Load three structures to three vectors, scalar + scalar
-def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
+def : InstRW<[V1Write_13c6_1L01_1S_1V01], (instregex "^LD3[BHWD]$")>;
// Contiguous Load four structures to four vectors, scalar + imm
-def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
+def : InstRW<[V1Write_12c8_1L01_1V01], (instregex "^LD4[BHWD]_IMM$")>;
// Contiguous Load four structures to four vectors, scalar + scalar
-def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
+def : InstRW<[V1Write_13c8_1L01_1S_1V01], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+def : InstRW<[V1Write_11c12_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
"^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[V1Write_9c_2L_2V],
+def : InstRW<[V1Write_9c6_1L_1V],
(instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
- "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$",
+ "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
"^GLD(FF)?1D_IMM$",
- "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>;
+ "^GLD(FF)?1D(_[SU]XTW)?$")>;
+
// Gather load, 32-bit scaled offset
-def : InstRW<[V1Write_11c_2L_2V],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
- "^GLD(FF)?1W_[SU]XTW_SCALED")>;
+def : InstRW<[V1Write_11c12_1L_1V],
+ (instregex "^GLD(FF)?1D(_[US]XTW)?_SCALED$",
+ "^GLD(FF)?1S?[HW]_D(_[US]XTW)?_SCALED$",
+ "^GLD(FF)?1S?H_S_[US]XTW_SCALED$",
+ "^GLD(FF)?1W_[US]XTW_SCALED$",
+ "^GLDFF1W_D_[US]XTW_SCALED$",
+ "^GLD1SH_S_[US]XTW_SCALED$")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[V1Write_9c_1L_1V],
+def : InstRW<[V1Write_9c6_1L_1V],
(instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
"^GLD(FF)?1W_[SU]XTW$")>;
@@ -1918,33 +2059,34 @@ def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
// Contiguous store, scalar + imm
-// Contiguous store, scalar + scalar
def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
"^ST1B_[HSD]_IMM$",
"^ST1H_[SD]_IMM$",
- "^ST1W_D_IMM$",
- "^ST1[BWD]$",
+ "^ST1W_D_IMM$")>;
+// Contiguous store, scalar + scalar
+def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BWD]$",
"^ST1B_[HSD]$",
"^ST1W_D$")>;
+
def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
// Contiguous store two structures from two vectors, scalar + imm
// Contiguous store two structures from two vectors, scalar + scalar
-def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
+def : InstRW<[V1Write_4c2_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
"^ST2[BWD]$")>;
def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
// Contiguous store three structures from three vectors, scalar + imm
-def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
+def : InstRW<[V1Write_7c9_1L01_1V], (instregex "^ST3[BHWD]_IMM$")>;
// Contiguous store three structures from three vectors, scalar + scalar
-def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
+def : InstRW<[V1Write_7c9_1L01_1S_1V], (instregex "^ST3[BHWD]$")>;
// Contiguous store four structures from four vectors, scalar + imm
-def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
+def : InstRW<[V1Write_11c18_1L01_1V], (instregex "^ST4[BHWD]_IMM$")>;
// Contiguous store four structures from four vectors, scalar + scalar
-def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
+def : InstRW<[V1Write_11c18_1L01_1S_1V], (instregex "^ST4[BHWD]$")>;
// Non temporal store, scalar + imm
// Non temporal store, scalar + scalar
@@ -1953,29 +2095,34 @@ def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
// Scatter store vector + imm 32-bit element size
+def : InstRW<[V1Write_10c8_1L01_1V], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
// Scatter store, 32-bit scaled offset
+def : InstRW<[V1Write_10c8_1L01_1V], (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
// Scatter store, 32-bit unscaled offset
-def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
- "^SST1W_IMM$",
- "^SST1(H_S|W)_[SU]XTW_SCALED$",
- "^SST1[BH]_S_[SU]XTW$",
+def : InstRW<[V1Write_10c8_1L01_1V], (instregex "^SST1[BH]_S_[SU]XTW$",
"^SST1W_[SU]XTW$")>;
// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[V1Write_6c4_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
// Scatter store, 32-bit unpacked scaled offset
-def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
- "^SST1D_[SU]XTW$",
- "^SST1[HW]_D_[SU]XTW_SCALED$",
+def : InstRW<[V1Write_6c4_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
"^SST1D_[SU]XTW_SCALED$")>;
// Scatter store vector + imm 64-bit element size
+def : InstRW<[V1Write_6c4_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
// Scatter store, 64-bit scaled offset
+def : InstRW<[V1Write_6c4_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
// Scatter store, 64-bit unscaled offset
-def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
- "^SST1D_IMM$",
- "^SST1[HW]_D_SCALED$",
- "^SST1D_SCALED$",
- "^SST1[BHW]_D$",
+def : InstRW<[V1Write_6c4_1L01_1V], (instregex "^SST1[BHW]_D$",
"^SST1D$")>;
@@ -1990,10 +2137,10 @@ def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P,
WRFFR)>;
// Read first fault register, predicated
-def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>;
+def : InstRW<[V1Write_3c2_1M0], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
-def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
+def : InstRW<[V1Write_4c6_1M], (instrs RDFFRS_PPz)>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index 33b76a4f65f05fd..c26c9b94cc9fa82 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -18,6 +18,38 @@ def NeoverseNoLSL : MCSchedPredicate<
CheckAll<[CheckShiftLSL,
CheckShiftBy0]>>;
+def Check32Ext : CheckAny<[CheckExtUXTB,
+ CheckExtUXTH,
+ CheckExtUXTX,
+ CheckExtSXTB,
+ CheckExtSXTH,
+ CheckExtSXTW,
+ CheckExtSXTX]>;
+
+def Check64Ext : CheckAny<[CheckExtUXTB,
+ CheckExtUXTH,
+ CheckExtUXTW,
+ CheckExtSXTB,
+ CheckExtSXTH,
+ CheckExtSXTW,
+ CheckExtSXTX]>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendAndShiftFn : TIIPredicate<"hasExtendAndShiftReg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArith64ExtOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAll<[
+ Check64Ext,
+ CheckAny<[
+ CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3,
+ CheckExtBy4]>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def RegExtendAndShiftPred : MCSchedPredicate<RegExtendAndShiftFn>;
+
// Identify LDR/STR H/Q-form scaled (and potentially extended) FP instructions
def NeoverseHQForm : MCSchedPredicate<
CheckAll<[
@@ -82,3 +114,14 @@ def NeoverseZeroMove : MCSchedPredicate<
CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
CheckImmOperand<1, 0>]>
]>>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled register.
+def NeoverseScaledIdxFn : TIIPredicate<"isNeoverseScaledAddr",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLoadStoreRegOffsetOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckMemScaled]>>>],
+ MCReturnStatement<FalsePred>>>;
+def NeoverseScaledIdxPred : MCSchedPredicate<NeoverseScaledIdxFn>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index 854d3ce564831db..2872529ab202ded 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -26,7 +26,7 @@ let FunctionMapper = "AArch64_AM::getArithExtendType" in {
}
// Check for shifting in extended arithmetic instructions.
-foreach I = {0-3} in {
+foreach I = {0-4} in {
let FunctionMapper = "AArch64_AM::getArithShiftValue" in
def CheckExtBy#I : CheckImmOperand<3, I>;
}
@@ -91,11 +91,17 @@ def CheckQForm : CheckFunctionPredicateWithTII<
>;
// Identify arithmetic instructions with extend.
-def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
- SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+def IsArith32ExtOp : CheckOpcode<[ADDWrx, ADDSWrx,
+ SUBWrx, SUBSWrx]>;
+
+def IsArith64ExtOp : CheckOpcode<[ADDXrx, ADDSXrx,
+ SUBXrx, SUBSXrx,
ADDXrx64, ADDSXrx64,
SUBXrx64, SUBSXrx64]>;
+def IsArithExtOp : CheckOpcode<!listconcat(IsArith32ExtOp.ValidOpcodes,
+ IsArith64ExtOp.ValidOpcodes)>;
+
// Identify arithmetic immediate instructions.
def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
SUBWri, SUBXri, SUBSWri, SUBSXri]>;
@@ -276,6 +282,28 @@ def IsCheapLSL : MCSchedPredicate<
CheckShiftBy3,
CheckShiftBy4]>]>>;
+// Check for arith LSL shift <= 4
+def IsCheapArithLSL : MCSchedPredicate<
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckExtBy0,
+ CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3,
+ CheckExtBy4]>]>>;
+
+// Check if logical instruction has shifted operand
+def hasShiftedOpndFn : TIIPredicate<"hasShiftedOpnd",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAll<[CheckAny<[CheckShiftLSL,CheckShiftLSR,CheckShiftASR]>,
+ CheckNot<CheckShiftBy0>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def hasShiftedOpndPred : MCSchedPredicate<hasShiftedOpndFn>;
+
// Idioms.
// Identify an instruction that effectively transfers a register to another.
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index a5330b9d6d2d67e..ab89f98564cbcfb 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -1392,16 +1392,16 @@ drps
# CHECK-NEXT: 1 1 0.25 sub w4, w20, #546, lsl #12
# CHECK-NEXT: 1 1 0.25 sub sp, sp, #288
# CHECK-NEXT: 1 1 0.25 sub wsp, w19, #16
-# CHECK-NEXT: 1 1 0.33 adds w13, w23, #291, lsl #12
-# CHECK-NEXT: 1 1 0.33 cmn w2, #4095
-# CHECK-NEXT: 1 1 0.33 adds w20, wsp, #0
-# CHECK-NEXT: 1 1 0.33 cmn x3, #1, lsl #12
-# CHECK-NEXT: 1 1 0.33 cmp sp, #20, lsl #12
-# CHECK-NEXT: 1 1 0.33 cmp x30, #4095
-# CHECK-NEXT: 1 1 0.33 subs x4, sp, #3822
-# CHECK-NEXT: 1 1 0.33 cmn w3, #291, lsl #12
-# CHECK-NEXT: 1 1 0.33 cmn wsp, #1365
-# CHECK-NEXT: 1 1 0.33 cmn sp, #1092, lsl #12
+# CHECK-NEXT: 2 2 0.50 adds w13, w23, #291, lsl #12
+# CHECK-NEXT: 2 1 0.33 cmn w2, #4095
+# CHECK-NEXT: 2 1 0.33 adds w20, wsp, #0
+# CHECK-NEXT: 2 2 0.50 cmn x3, #1, lsl #12
+# CHECK-NEXT: 2 2 0.50 cmp sp, #20, lsl #12
+# CHECK-NEXT: 2 1 0.33 cmp x30, #4095
+# CHECK-NEXT: 2 1 0.33 subs x4, sp, #3822
+# CHECK-NEXT: 2 2 0.50 cmn w3, #291, lsl #12
+# CHECK-NEXT: 2 1 0.33 cmn wsp, #1365
+# CHECK-NEXT: 2 2 0.50 cmn sp, #1092, lsl #12
# CHECK-NEXT: 1 1 0.25 mov sp, x30
# CHECK-NEXT: 1 1 0.25 mov wsp, w20
# CHECK-NEXT: 1 1 0.25 mov x11, sp
@@ -1432,32 +1432,32 @@ drps
# CHECK-NEXT: 1 2 0.50 add x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.33 adds w3, w5, w7
-# CHECK-NEXT: 1 1 0.33 cmn w3, w5
-# CHECK-NEXT: 1 1 0.33 adds w20, wzr, w4
-# CHECK-NEXT: 1 1 0.33 adds w4, w6, wzr
-# CHECK-NEXT: 1 1 0.33 adds w11, w13, w15
-# CHECK-NEXT: 1 1 0.33 adds w9, w3, wzr, lsl #1
-# CHECK-NEXT: 1 2 0.50 adds w17, w29, w20, lsl #31
-# CHECK-NEXT: 1 2 0.50 adds w21, w22, w23, lsr #0
-# CHECK-NEXT: 1 2 0.50 adds w24, w25, w26, lsr #18
-# CHECK-NEXT: 1 2 0.50 adds w27, w28, w29, lsr #31
-# CHECK-NEXT: 1 2 0.50 adds w2, w3, w4, asr #0
-# CHECK-NEXT: 1 2 0.50 adds w5, w6, w7, asr #21
-# CHECK-NEXT: 1 2 0.50 adds w8, w9, w10, asr #31
-# CHECK-NEXT: 1 1 0.33 adds x3, x5, x7
-# CHECK-NEXT: 1 1 0.33 cmn x3, x5
-# CHECK-NEXT: 1 1 0.33 adds x20, xzr, x4
-# CHECK-NEXT: 1 1 0.33 adds x4, x6, xzr
-# CHECK-NEXT: 1 1 0.33 adds x11, x13, x15
-# CHECK-NEXT: 1 2 0.50 adds x9, x3, xzr, lsl #10
-# CHECK-NEXT: 1 1 0.33 adds x17, x29, x20, lsl #3
-# CHECK-NEXT: 1 2 0.50 adds x21, x22, x23, lsr #0
-# CHECK-NEXT: 1 2 0.50 adds x24, x25, x26, lsr #18
-# CHECK-NEXT: 1 2 0.50 adds x27, x28, x29, lsr #63
-# CHECK-NEXT: 1 2 0.50 adds x2, x3, x4, asr #0
-# CHECK-NEXT: 1 2 0.50 adds x5, x6, x7, asr #21
-# CHECK-NEXT: 1 2 0.50 adds x8, x9, x10, asr #63
+# CHECK-NEXT: 2 1 0.33 adds w3, w5, w7
+# CHECK-NEXT: 2 1 0.33 cmn w3, w5
+# CHECK-NEXT: 2 1 0.33 adds w20, wzr, w4
+# CHECK-NEXT: 2 1 0.33 adds w4, w6, wzr
+# CHECK-NEXT: 2 1 0.33 adds w11, w13, w15
+# CHECK-NEXT: 2 1 0.33 adds w9, w3, wzr, lsl #1
+# CHECK-NEXT: 2 2 0.50 adds w17, w29, w20, lsl #31
+# CHECK-NEXT: 2 2 0.50 adds w21, w22, w23, lsr #0
+# CHECK-NEXT: 2 2 0.50 adds w24, w25, w26, lsr #18
+# CHECK-NEXT: 2 2 0.50 adds w27, w28, w29, lsr #31
+# CHECK-NEXT: 2 2 0.50 adds w2, w3, w4, asr #0
+# CHECK-NEXT: 2 2 0.50 adds w5, w6, w7, asr #21
+# CHECK-NEXT: 2 2 0.50 adds w8, w9, w10, asr #31
+# CHECK-NEXT: 2 1 0.33 adds x3, x5, x7
+# CHECK-NEXT: 2 1 0.33 cmn x3, x5
+# CHECK-NEXT: 2 1 0.33 adds x20, xzr, x4
+# CHECK-NEXT: 2 1 0.33 adds x4, x6, xzr
+# CHECK-NEXT: 2 1 0.33 adds x11, x13, x15
+# CHECK-NEXT: 2 2 0.50 adds x9, x3, xzr, lsl #10
+# CHECK-NEXT: 2 1 0.33 adds x17, x29, x20, lsl #3
+# CHECK-NEXT: 2 2 0.50 adds x21, x22, x23, lsr #0
+# CHECK-NEXT: 2 2 0.50 adds x24, x25, x26, lsr #18
+# CHECK-NEXT: 2 2 0.50 adds x27, x28, x29, lsr #63
+# CHECK-NEXT: 2 2 0.50 adds x2, x3, x4, asr #0
+# CHECK-NEXT: 2 2 0.50 adds x5, x6, x7, asr #21
+# CHECK-NEXT: 2 2 0.50 adds x8, x9, x10, asr #63
# CHECK-NEXT: 1 1 0.25 sub w3, w5, w7
# CHECK-NEXT: 1 1 0.25 sub wzr, w3, w5
# CHECK-NEXT: 1 1 0.25 sub w4, w6, wzr
@@ -1482,78 +1482,78 @@ drps
# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.33 subs w3, w5, w7
-# CHECK-NEXT: 1 1 0.33 cmp w3, w5
-# CHECK-NEXT: 1 1 0.33 subs w4, w6, wzr
-# CHECK-NEXT: 1 1 0.33 subs w11, w13, w15
-# CHECK-NEXT: 1 1 0.33 subs w9, w3, wzr, lsl #1
-# CHECK-NEXT: 1 2 0.50 subs w17, w29, w20, lsl #31
-# CHECK-NEXT: 1 2 0.50 subs w21, w22, w23, lsr #0
-# CHECK-NEXT: 1 2 0.50 subs w24, w25, w26, lsr #18
-# CHECK-NEXT: 1 2 0.50 subs w27, w28, w29, lsr #31
-# CHECK-NEXT: 1 2 0.50 subs w2, w3, w4, asr #0
-# CHECK-NEXT: 1 2 0.50 subs w5, w6, w7, asr #21
-# CHECK-NEXT: 1 2 0.50 subs w8, w9, w10, asr #31
-# CHECK-NEXT: 1 1 0.33 subs x3, x5, x7
-# CHECK-NEXT: 1 1 0.33 cmp x3, x5
-# CHECK-NEXT: 1 1 0.33 subs x4, x6, xzr
-# CHECK-NEXT: 1 1 0.33 subs x11, x13, x15
-# CHECK-NEXT: 1 2 0.50 subs x9, x3, xzr, lsl #10
-# CHECK-NEXT: 1 1 0.33 subs x17, x29, x20, lsl #3
-# CHECK-NEXT: 1 2 0.50 subs x21, x22, x23, lsr #0
-# CHECK-NEXT: 1 2 0.50 subs x24, x25, x26, lsr #18
-# CHECK-NEXT: 1 2 0.50 subs x27, x28, x29, lsr #63
-# CHECK-NEXT: 1 2 0.50 subs x2, x3, x4, asr #0
-# CHECK-NEXT: 1 2 0.50 subs x5, x6, x7, asr #21
-# CHECK-NEXT: 1 2 0.50 subs x8, x9, x10, asr #63
-# CHECK-NEXT: 1 1 0.33 cmn wzr, w4
-# CHECK-NEXT: 1 1 0.33 cmn w5, wzr
-# CHECK-NEXT: 1 1 0.33 cmn w6, w7
-# CHECK-NEXT: 1 1 0.33 cmn w8, w9, lsl #1
-# CHECK-NEXT: 1 2 0.50 cmn w10, w11, lsl #31
-# CHECK-NEXT: 1 2 0.50 cmn w12, w13, lsr #0
-# CHECK-NEXT: 1 2 0.50 cmn w14, w15, lsr #21
-# CHECK-NEXT: 1 2 0.50 cmn w16, w17, lsr #31
-# CHECK-NEXT: 1 2 0.50 cmn w18, w19, asr #0
-# CHECK-NEXT: 1 2 0.50 cmn w20, w21, asr #22
-# CHECK-NEXT: 1 2 0.50 cmn w22, w23, asr #31
-# CHECK-NEXT: 1 1 0.33 cmn x0, x3
-# CHECK-NEXT: 1 1 0.33 cmn xzr, x4
-# CHECK-NEXT: 1 1 0.33 cmn x5, xzr
-# CHECK-NEXT: 1 1 0.33 cmn x6, x7
-# CHECK-NEXT: 1 2 0.50 cmn x8, x9, lsl #15
-# CHECK-NEXT: 1 1 0.33 cmn x10, x11, lsl #3
-# CHECK-NEXT: 1 2 0.50 cmn x12, x13, lsr #0
-# CHECK-NEXT: 1 2 0.50 cmn x14, x15, lsr #41
-# CHECK-NEXT: 1 2 0.50 cmn x16, x17, lsr #63
-# CHECK-NEXT: 1 2 0.50 cmn x18, x19, asr #0
-# CHECK-NEXT: 1 2 0.50 cmn x20, x21, asr #55
-# CHECK-NEXT: 1 2 0.50 cmn x22, x23, asr #63
-# CHECK-NEXT: 1 1 0.33 cmp w0, w3
-# CHECK-NEXT: 1 1 0.33 cmp wzr, w4
-# CHECK-NEXT: 1 1 0.33 cmp w5, wzr
-# CHECK-NEXT: 1 1 0.33 cmp w6, w7
-# CHECK-NEXT: 1 1 0.33 cmp w8, w9, lsl #1
-# CHECK-NEXT: 1 2 0.50 cmp w10, w11, lsl #31
-# CHECK-NEXT: 1 2 0.50 cmp w12, w13, lsr #0
-# CHECK-NEXT: 1 2 0.50 cmp w14, w15, lsr #21
-# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0
-# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22
-# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31
-# CHECK-NEXT: 1 1 0.33 cmp x0, x3
-# CHECK-NEXT: 1 1 0.33 cmp xzr, x4
-# CHECK-NEXT: 1 1 0.33 cmp x5, xzr
-# CHECK-NEXT: 1 1 0.33 cmp x6, x7
-# CHECK-NEXT: 1 2 0.50 cmp x8, x9, lsl #15
-# CHECK-NEXT: 1 1 0.33 cmp x10, x11, lsl #3
-# CHECK-NEXT: 1 2 0.50 cmp x12, x13, lsr #0
-# CHECK-NEXT: 1 2 0.50 cmp x14, x15, lsr #41
-# CHECK-NEXT: 1 2 0.50 cmp x16, x17, lsr #63
-# CHECK-NEXT: 1 2 0.50 cmp x18, x19, asr #0
-# CHECK-NEXT: 1 2 0.50 cmp x20, x21, asr #55
-# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63
-# CHECK-NEXT: 1 1 0.33 cmp wzr, w0
-# CHECK-NEXT: 1 1 0.33 cmp xzr, x0
+# CHECK-NEXT: 2 1 0.33 subs w3, w5, w7
+# CHECK-NEXT: 2 1 0.33 cmp w3, w5
+# CHECK-NEXT: 2 1 0.33 subs w4, w6, wzr
+# CHECK-NEXT: 2 1 0.33 subs w11, w13, w15
+# CHECK-NEXT: 2 1 0.33 subs w9, w3, wzr, lsl #1
+# CHECK-NEXT: 2 2 0.50 subs w17, w29, w20, lsl #31
+# CHECK-NEXT: 2 2 0.50 subs w21, w22, w23, lsr #0
+# CHECK-NEXT: 2 2 0.50 subs w24, w25, w26, lsr #18
+# CHECK-NEXT: 2 2 0.50 subs w27, w28, w29, lsr #31
+# CHECK-NEXT: 2 2 0.50 subs w2, w3, w4, asr #0
+# CHECK-NEXT: 2 2 0.50 subs w5, w6, w7, asr #21
+# CHECK-NEXT: 2 2 0.50 subs w8, w9, w10, asr #31
+# CHECK-NEXT: 2 1 0.33 subs x3, x5, x7
+# CHECK-NEXT: 2 1 0.33 cmp x3, x5
+# CHECK-NEXT: 2 1 0.33 subs x4, x6, xzr
+# CHECK-NEXT: 2 1 0.33 subs x11, x13, x15
+# CHECK-NEXT: 2 2 0.50 subs x9, x3, xzr, lsl #10
+# CHECK-NEXT: 2 1 0.33 subs x17, x29, x20, lsl #3
+# CHECK-NEXT: 2 2 0.50 subs x21, x22, x23, lsr #0
+# CHECK-NEXT: 2 2 0.50 subs x24, x25, x26, lsr #18
+# CHECK-NEXT: 2 2 0.50 subs x27, x28, x29, lsr #63
+# CHECK-NEXT: 2 2 0.50 subs x2, x3, x4, asr #0
+# CHECK-NEXT: 2 2 0.50 subs x5, x6, x7, asr #21
+# CHECK-NEXT: 2 2 0.50 subs x8, x9, x10, asr #63
+# CHECK-NEXT: 2 1 0.33 cmn wzr, w4
+# CHECK-NEXT: 2 1 0.33 cmn w5, wzr
+# CHECK-NEXT: 2 1 0.33 cmn w6, w7
+# CHECK-NEXT: 2 1 0.33 cmn w8, w9, lsl #1
+# CHECK-NEXT: 2 2 0.50 cmn w10, w11, lsl #31
+# CHECK-NEXT: 2 2 0.50 cmn w12, w13, lsr #0
+# CHECK-NEXT: 2 2 0.50 cmn w14, w15, lsr #21
+# CHECK-NEXT: 2 2 0.50 cmn w16, w17, lsr #31
+# CHECK-NEXT: 2 2 0.50 cmn w18, w19, asr #0
+# CHECK-NEXT: 2 2 0.50 cmn w20, w21, asr #22
+# CHECK-NEXT: 2 2 0.50 cmn w22, w23, asr #31
+# CHECK-NEXT: 2 1 0.33 cmn x0, x3
+# CHECK-NEXT: 2 1 0.33 cmn xzr, x4
+# CHECK-NEXT: 2 1 0.33 cmn x5, xzr
+# CHECK-NEXT: 2 1 0.33 cmn x6, x7
+# CHECK-NEXT: 2 2 0.50 cmn x8, x9, lsl #15
+# CHECK-NEXT: 2 1 0.33 cmn x10, x11, lsl #3
+# CHECK-NEXT: 2 2 0.50 cmn x12, x13, lsr #0
+# CHECK-NEXT: 2 2 0.50 cmn x14, x15, lsr #41
+# CHECK-NEXT: 2 2 0.50 cmn x16, x17, lsr #63
+# CHECK-NEXT: 2 2 0.50 cmn x18, x19, asr #0
+# CHECK-NEXT: 2 2 0.50 cmn x20, x21, asr #55
+# CHECK-NEXT: 2 2 0.50 cmn x22, x23, asr #63
+# CHECK-NEXT: 2 1 0.33 cmp w0, w3
+# CHECK-NEXT: 2 1 0.33 cmp wzr, w4
+# CHECK-NEXT: 2 1 0.33 cmp w5, wzr
+# CHECK-NEXT: 2 1 0.33 cmp w6, w7
+# CHECK-NEXT: 2 1 0.33 cmp w8, w9, lsl #1
+# CHECK-NEXT: 2 2 0.50 cmp w10, w11, lsl #31
+# CHECK-NEXT: 2 2 0.50 cmp w12, w13, lsr #0
+# CHECK-NEXT: 2 2 0.50 cmp w14, w15, lsr #21
+# CHECK-NEXT: 2 2 0.50 cmp w18, w19, asr #0
+# CHECK-NEXT: 2 2 0.50 cmp w20, w21, asr #22
+# CHECK-NEXT: 2 2 0.50 cmp w22, w23, asr #31
+# CHECK-NEXT: 2 1 0.33 cmp x0, x3
+# CHECK-NEXT: 2 1 0.33 cmp xzr, x4
+# CHECK-NEXT: 2 1 0.33 cmp x5, xzr
+# CHECK-NEXT: 2 1 0.33 cmp x6, x7
+# CHECK-NEXT: 2 2 0.50 cmp x8, x9, lsl #15
+# CHECK-NEXT: 2 1 0.33 cmp x10, x11, lsl #3
+# CHECK-NEXT: 2 2 0.50 cmp x12, x13, lsr #0
+# CHECK-NEXT: 2 2 0.50 cmp x14, x15, lsr #41
+# CHECK-NEXT: 2 2 0.50 cmp x16, x17, lsr #63
+# CHECK-NEXT: 2 2 0.50 cmp x18, x19, asr #0
+# CHECK-NEXT: 2 2 0.50 cmp x20, x21, asr #55
+# CHECK-NEXT: 2 2 0.50 cmp x22, x23, asr #63
+# CHECK-NEXT: 2 1 0.33 cmp wzr, w0
+# CHECK-NEXT: 2 1 0.33 cmp xzr, x0
# CHECK-NEXT: 1 1 0.25 adc w29, w27, w25
# CHECK-NEXT: 1 1 0.25 adc wzr, w3, w4
# CHECK-NEXT: 1 1 0.25 adc w9, wzr, w10
@@ -1562,14 +1562,14 @@ drps
# CHECK-NEXT: 1 1 0.25 adc xzr, x3, x4
# CHECK-NEXT: 1 1 0.25 adc x9, xzr, x10
# CHECK-NEXT: 1 1 0.25 adc x20, x0, xzr
-# CHECK-NEXT: 1 1 0.33 adcs w29, w27, w25
-# CHECK-NEXT: 1 1 0.33 adcs wzr, w3, w4
-# CHECK-NEXT: 1 1 0.33 adcs w9, wzr, w10
-# CHECK-NEXT: 1 1 0.33 adcs w20, w0, wzr
-# CHECK-NEXT: 1 1 0.33 adcs x29, x27, x25
-# CHECK-NEXT: 1 1 0.33 adcs xzr, x3, x4
-# CHECK-NEXT: 1 1 0.33 adcs x9, xzr, x10
-# CHECK-NEXT: 1 1 0.33 adcs x20, x0, xzr
+# CHECK-NEXT: 2 1 0.33 adcs w29, w27, w25
+# CHECK-NEXT: 2 1 0.33 adcs wzr, w3, w4
+# CHECK-NEXT: 2 1 0.33 adcs w9, wzr, w10
+# CHECK-NEXT: 2 1 0.33 adcs w20, w0, wzr
+# CHECK-NEXT: 2 1 0.33 adcs x29, x27, x25
+# CHECK-NEXT: 2 1 0.33 adcs xzr, x3, x4
+# CHECK-NEXT: 2 1 0.33 adcs x9, xzr, x10
+# CHECK-NEXT: 2 1 0.33 adcs x20, x0, xzr
# CHECK-NEXT: 1 1 0.25 sbc w29, w27, w25
# CHECK-NEXT: 1 1 0.25 sbc wzr, w3, w4
# CHECK-NEXT: 1 1 0.25 ngc w9, w10
@@ -1578,26 +1578,26 @@ drps
# CHECK-NEXT: 1 1 0.25 sbc xzr, x3, x4
# CHECK-NEXT: 1 1 0.25 ngc x9, x10
# CHECK-NEXT: 1 1 0.25 sbc x20, x0, xzr
-# CHECK-NEXT: 1 1 0.33 sbcs w29, w27, w25
-# CHECK-NEXT: 1 1 0.33 sbcs wzr, w3, w4
-# CHECK-NEXT: 1 1 0.33 ngcs w9, w10
-# CHECK-NEXT: 1 1 0.33 sbcs w20, w0, wzr
-# CHECK-NEXT: 1 1 0.33 sbcs x29, x27, x25
-# CHECK-NEXT: 1 1 0.33 sbcs xzr, x3, x4
-# CHECK-NEXT: 1 1 0.33 ngcs x9, x10
-# CHECK-NEXT: 1 1 0.33 sbcs x20, x0, xzr
+# CHECK-NEXT: 2 1 0.33 sbcs w29, w27, w25
+# CHECK-NEXT: 2 1 0.33 sbcs wzr, w3, w4
+# CHECK-NEXT: 2 1 0.33 ngcs w9, w10
+# CHECK-NEXT: 2 1 0.33 sbcs w20, w0, wzr
+# CHECK-NEXT: 2 1 0.33 sbcs x29, x27, x25
+# CHECK-NEXT: 2 1 0.33 sbcs xzr, x3, x4
+# CHECK-NEXT: 2 1 0.33 ngcs x9, x10
+# CHECK-NEXT: 2 1 0.33 sbcs x20, x0, xzr
# CHECK-NEXT: 1 1 0.25 ngc w3, w12
# CHECK-NEXT: 1 1 0.25 ngc wzr, w9
# CHECK-NEXT: 1 1 0.25 ngc w23, wzr
# CHECK-NEXT: 1 1 0.25 ngc x29, x30
# CHECK-NEXT: 1 1 0.25 ngc xzr, x0
# CHECK-NEXT: 1 1 0.25 ngc x0, xzr
-# CHECK-NEXT: 1 1 0.33 ngcs w3, w12
-# CHECK-NEXT: 1 1 0.33 ngcs wzr, w9
-# CHECK-NEXT: 1 1 0.33 ngcs w23, wzr
-# CHECK-NEXT: 1 1 0.33 ngcs x29, x30
-# CHECK-NEXT: 1 1 0.33 ngcs xzr, x0
-# CHECK-NEXT: 1 1 0.33 ngcs x0, xzr
+# CHECK-NEXT: 2 1 0.33 ngcs w3, w12
+# CHECK-NEXT: 2 1 0.33 ngcs wzr, w9
+# CHECK-NEXT: 2 1 0.33 ngcs w23, wzr
+# CHECK-NEXT: 2 1 0.33 ngcs x29, x30
+# CHECK-NEXT: 2 1 0.33 ngcs xzr, x0
+# CHECK-NEXT: 2 1 0.33 ngcs x0, xzr
# CHECK-NEXT: 1 1 0.25 sbfx x1, x2, #3, #2
# CHECK-NEXT: 1 1 0.25 asr x3, x4, #63
# CHECK-NEXT: 1 1 0.25 asr wzr, wzr, #31
@@ -1784,10 +1784,10 @@ drps
# CHECK-NEXT: 1 1 0.25 clz x26, x4
# CHECK-NEXT: 1 1 0.25 cls w3, w5
# CHECK-NEXT: 1 1 0.25 cls x20, x5
-# CHECK-NEXT: 1 12 5.00 udiv w0, w7, w10
-# CHECK-NEXT: 1 20 5.00 udiv x9, x22, x4
-# CHECK-NEXT: 1 12 5.00 sdiv w12, w21, w0
-# CHECK-NEXT: 1 20 5.00 sdiv x13, x2, x1
+# CHECK-NEXT: 1 12 12.00 udiv w0, w7, w10
+# CHECK-NEXT: 1 20 20.00 udiv x9, x22, x4
+# CHECK-NEXT: 1 12 12.00 sdiv w12, w21, w0
+# CHECK-NEXT: 1 20 20.00 sdiv x13, x2, x1
# CHECK-NEXT: 1 1 0.25 lsl w11, w12, w13
# CHECK-NEXT: 1 1 0.25 lsl x14, x15, x16
# CHECK-NEXT: 1 1 0.25 lsr w17, w18, w19
@@ -1855,10 +1855,10 @@ drps
# CHECK-NEXT: 1 2 0.50 umull x11, w13, w17
# CHECK-NEXT: 1 2 0.50 smnegl x11, w13, w17
# CHECK-NEXT: 1 2 0.50 umnegl x11, w13, w17
-# CHECK-NEXT: 2 3 0.50 extr w3, w5, w7, #0
-# CHECK-NEXT: 2 3 0.50 extr w11, w13, w17, #31
-# CHECK-NEXT: 2 3 0.50 extr x3, x5, x7, #15
-# CHECK-NEXT: 2 3 0.50 extr x11, x13, x17, #63
+# CHECK-NEXT: 1 3 0.50 extr w3, w5, w7, #0
+# CHECK-NEXT: 1 3 0.50 extr w11, w13, w17, #31
+# CHECK-NEXT: 1 3 0.50 extr x3, x5, x7, #15
+# CHECK-NEXT: 1 3 0.50 extr x11, x13, x17, #63
# CHECK-NEXT: 1 1 0.25 ror x19, x23, #24
# CHECK-NEXT: 1 1 0.25 ror x29, xzr, #63
# CHECK-NEXT: 1 1 0.25 ror w9, w13, #31
@@ -1891,7 +1891,7 @@ drps
# CHECK-NEXT: 1 2 0.25 fmov s0, s1
# CHECK-NEXT: 1 2 0.25 fabs s2, s3
# CHECK-NEXT: 1 2 0.25 fneg s4, s5
-# CHECK-NEXT: 1 10 3.50 fsqrt s6, s7
+# CHECK-NEXT: 1 9 1.00 fsqrt s6, s7
# CHECK-NEXT: 1 3 0.50 fcvt d8, s9
# CHECK-NEXT: 1 3 0.50 fcvt h10, s11
# CHECK-NEXT: 1 3 0.50 frintn s12, s13
@@ -1904,7 +1904,7 @@ drps
# CHECK-NEXT: 1 2 0.25 fmov d0, d1
# CHECK-NEXT: 1 2 0.25 fabs d2, d3
# CHECK-NEXT: 1 2 0.25 fneg d4, d5
-# CHECK-NEXT: 1 16 3.50 fsqrt d6, d7
+# CHECK-NEXT: 1 16 4.00 fsqrt d6, d7
# CHECK-NEXT: 1 3 0.50 fcvt s8, d9
# CHECK-NEXT: 1 3 0.50 fcvt h10, d11
# CHECK-NEXT: 1 3 0.50 frintn d12, d13
@@ -1917,7 +1917,7 @@ drps
# CHECK-NEXT: 1 3 0.50 fcvt s26, h27
# CHECK-NEXT: 1 3 0.50 fcvt d28, h29
# CHECK-NEXT: 1 3 0.25 fmul s20, s19, s17
-# CHECK-NEXT: 1 10 3.50 fdiv s1, s2, s3
+# CHECK-NEXT: 1 10 1.50 fdiv s1, s2, s3
# CHECK-NEXT: 1 2 0.25 fadd s4, s5, s6
# CHECK-NEXT: 1 2 0.25 fsub s7, s8, s9
# CHECK-NEXT: 1 2 0.25 fmax s10, s11, s12
@@ -1942,42 +1942,42 @@ drps
# CHECK-NEXT: 1 4 0.25 fnmadd d3, d13, d0, d23
# CHECK-NEXT: 1 4 0.25 fnmsub s3, s5, s6, s31
# CHECK-NEXT: 1 4 0.25 fnmsub d3, d13, d0, d23
-# CHECK-NEXT: 1 3 0.50 fcvtzs w3, h5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs wzr, h20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzs w19, h0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzs x3, h5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs x12, h30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzs x19, h0, #64
-# CHECK-NEXT: 1 3 0.50 fcvtzs w3, s5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs wzr, s20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzs w19, s0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzs x3, s5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs x12, s30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzs x19, s0, #64
-# CHECK-NEXT: 1 3 0.50 fcvtzs w3, d5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs wzr, d20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzs w19, d0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzs x3, d5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzs x12, d30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzs x19, d0, #64
-# CHECK-NEXT: 1 3 0.50 fcvtzu w3, h5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu wzr, h20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzu w19, h0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzu x3, h5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu x12, h30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzu x19, h0, #64
-# CHECK-NEXT: 1 3 0.50 fcvtzu w3, s5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu wzr, s20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzu w19, s0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzu x3, s5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu x12, s30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzu x19, s0, #64
-# CHECK-NEXT: 1 3 0.50 fcvtzu w3, d5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu wzr, d20, #13
-# CHECK-NEXT: 1 3 0.50 fcvtzu w19, d0, #32
-# CHECK-NEXT: 1 3 0.50 fcvtzu x3, d5, #1
-# CHECK-NEXT: 1 3 0.50 fcvtzu x12, d30, #45
-# CHECK-NEXT: 1 3 0.50 fcvtzu x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, d0, #64
# CHECK-NEXT: 1 3 1.00 scvtf h23, w19, #1
# CHECK-NEXT: 1 3 1.00 scvtf h31, wzr, #20
# CHECK-NEXT: 1 3 1.00 scvtf h14, w0, #32
@@ -2182,9 +2182,9 @@ drps
# CHECK-NEXT: 3 1 0.50 * str w19, [sp], #255
# CHECK-NEXT: 3 1 0.50 * str w20, [x30], #1
# CHECK-NEXT: 3 1 0.50 * str w21, [x12], #-256
-# CHECK-NEXT: 3 1 0.50 * str xzr, [x9], #255
-# CHECK-NEXT: 3 1 0.50 * str x2, [x3], #1
-# CHECK-NEXT: 3 1 0.50 * str x19, [x12], #-256
+# CHECK-NEXT: 2 1 0.50 * str xzr, [x9], #255
+# CHECK-NEXT: 2 1 0.50 * str x2, [x3], #1
+# CHECK-NEXT: 2 1 0.50 * str x19, [x12], #-256
# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2], #255
# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #1
# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #-256
@@ -2212,18 +2212,18 @@ drps
# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9], #255
# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3], #1
# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12], #-256
-# CHECK-NEXT: 3 2 0.50 * str b0, [x0], #255
-# CHECK-NEXT: 3 2 0.50 * str b3, [x3], #1
-# CHECK-NEXT: 3 2 0.50 * str b5, [sp], #-256
-# CHECK-NEXT: 3 2 0.50 * str h10, [x10], #255
-# CHECK-NEXT: 3 2 0.50 * str h13, [x23], #1
-# CHECK-NEXT: 3 2 0.50 * str h15, [sp], #-256
-# CHECK-NEXT: 3 2 0.50 * str s20, [x20], #255
-# CHECK-NEXT: 3 2 0.50 * str s23, [x23], #1
-# CHECK-NEXT: 3 2 0.50 * str s25, [x0], #-256
-# CHECK-NEXT: 3 2 0.50 * str d20, [x20], #255
-# CHECK-NEXT: 3 2 0.50 * str d23, [x23], #1
-# CHECK-NEXT: 3 2 0.50 * str d25, [x0], #-256
+# CHECK-NEXT: 2 2 0.50 * str b0, [x0], #255
+# CHECK-NEXT: 2 2 0.50 * str b3, [x3], #1
+# CHECK-NEXT: 2 2 0.50 * str b5, [sp], #-256
+# CHECK-NEXT: 2 2 0.50 * str h10, [x10], #255
+# CHECK-NEXT: 2 2 0.50 * str h13, [x23], #1
+# CHECK-NEXT: 2 2 0.50 * str h15, [sp], #-256
+# CHECK-NEXT: 2 2 0.50 * str s20, [x20], #255
+# CHECK-NEXT: 2 2 0.50 * str s23, [x23], #1
+# CHECK-NEXT: 2 2 0.50 * str s25, [x0], #-256
+# CHECK-NEXT: 2 2 0.50 * str d20, [x20], #255
+# CHECK-NEXT: 2 2 0.50 * str d23, [x23], #1
+# CHECK-NEXT: 2 2 0.50 * str d25, [x0], #-256
# CHECK-NEXT: 2 6 0.33 * ldr b0, [x0], #255
# CHECK-NEXT: 2 6 0.33 * ldr b3, [x3], #1
# CHECK-NEXT: 2 6 0.33 * ldr b5, [sp], #-256
@@ -2239,9 +2239,9 @@ drps
# CHECK-NEXT: 2 6 0.33 * ldr q20, [x1], #255
# CHECK-NEXT: 2 6 0.33 * ldr q23, [x9], #1
# CHECK-NEXT: 2 6 0.33 * ldr q25, [x20], #-256
-# CHECK-NEXT: 3 2 0.50 * str q10, [x1], #255
-# CHECK-NEXT: 3 2 0.50 * str q22, [sp], #1
-# CHECK-NEXT: 3 2 0.50 * str q21, [x20], #-256
+# CHECK-NEXT: 2 2 0.50 * str q10, [x1], #255
+# CHECK-NEXT: 2 2 0.50 * str q22, [sp], #1
+# CHECK-NEXT: 2 2 0.50 * str q21, [x20], #-256
# CHECK-NEXT: 2 4 0.33 * ldr x3, [x4, #0]!
# CHECK-NEXT: 3 1 0.50 * strb w9, [x2, #255]!
# CHECK-NEXT: 3 1 0.50 * strb w10, [x3, #1]!
@@ -2252,9 +2252,9 @@ drps
# CHECK-NEXT: 3 1 0.50 * str w19, [sp, #255]!
# CHECK-NEXT: 3 1 0.50 * str w20, [x30, #1]!
# CHECK-NEXT: 3 1 0.50 * str w21, [x12, #-256]!
-# CHECK-NEXT: 3 1 0.50 * str xzr, [x9, #255]!
-# CHECK-NEXT: 3 1 0.50 * str x2, [x3, #1]!
-# CHECK-NEXT: 3 1 0.50 * str x19, [x12, #-256]!
+# CHECK-NEXT: 2 1 0.50 * str xzr, [x9, #255]!
+# CHECK-NEXT: 2 1 0.50 * str x2, [x3, #1]!
+# CHECK-NEXT: 2 1 0.50 * str x19, [x12, #-256]!
# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2, #255]!
# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #1]!
# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #-256]!
@@ -2282,18 +2282,18 @@ drps
# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9, #255]!
# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3, #1]!
# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12, #-256]!
-# CHECK-NEXT: 3 2 0.50 * str b0, [x0, #255]!
-# CHECK-NEXT: 3 2 0.50 * str b3, [x3, #1]!
-# CHECK-NEXT: 3 2 0.50 * str b5, [sp, #-256]!
-# CHECK-NEXT: 3 2 0.50 * str h10, [x10, #255]!
-# CHECK-NEXT: 3 2 0.50 * str h13, [x23, #1]!
-# CHECK-NEXT: 3 2 0.50 * str h15, [sp, #-256]!
-# CHECK-NEXT: 3 2 0.50 * str s20, [x20, #255]!
-# CHECK-NEXT: 3 2 0.50 * str s23, [x23, #1]!
-# CHECK-NEXT: 3 2 0.50 * str s25, [x0, #-256]!
-# CHECK-NEXT: 3 2 0.50 * str d20, [x20, #255]!
-# CHECK-NEXT: 3 2 0.50 * str d23, [x23, #1]!
-# CHECK-NEXT: 3 2 0.50 * str d25, [x0, #-256]!
+# CHECK-NEXT: 2 2 0.50 * str b0, [x0, #255]!
+# CHECK-NEXT: 2 2 0.50 * str b3, [x3, #1]!
+# CHECK-NEXT: 2 2 0.50 * str b5, [sp, #-256]!
+# CHECK-NEXT: 2 2 0.50 * str h10, [x10, #255]!
+# CHECK-NEXT: 2 2 0.50 * str h13, [x23, #1]!
+# CHECK-NEXT: 2 2 0.50 * str h15, [sp, #-256]!
+# CHECK-NEXT: 2 2 0.50 * str s20, [x20, #255]!
+# CHECK-NEXT: 2 2 0.50 * str s23, [x23, #1]!
+# CHECK-NEXT: 2 2 0.50 * str s25, [x0, #-256]!
+# CHECK-NEXT: 2 2 0.50 * str d20, [x20, #255]!
+# CHECK-NEXT: 2 2 0.50 * str d23, [x23, #1]!
+# CHECK-NEXT: 2 2 0.50 * str d25, [x0, #-256]!
# CHECK-NEXT: 2 6 0.33 * ldr b0, [x0, #255]!
# CHECK-NEXT: 2 6 0.33 * ldr b3, [x3, #1]!
# CHECK-NEXT: 2 6 0.33 * ldr b5, [sp, #-256]!
@@ -2309,9 +2309,9 @@ drps
# CHECK-NEXT: 2 6 0.33 * ldr q20, [x1, #255]!
# CHECK-NEXT: 2 6 0.33 * ldr q23, [x9, #1]!
# CHECK-NEXT: 2 6 0.33 * ldr q25, [x20, #-256]!
-# CHECK-NEXT: 3 2 0.50 * str q10, [x1, #255]!
-# CHECK-NEXT: 3 2 0.50 * str q22, [sp, #1]!
-# CHECK-NEXT: 3 2 0.50 * str q21, [x20, #-256]!
+# CHECK-NEXT: 2 2 0.50 * str q10, [x1, #255]!
+# CHECK-NEXT: 2 2 0.50 * str q22, [sp, #1]!
+# CHECK-NEXT: 2 2 0.50 * str q21, [x20, #-256]!
# CHECK-NEXT: 2 1 0.50 * sttrb w9, [sp]
# CHECK-NEXT: 2 1 0.50 * sttrh wzr, [x12, #255]
# CHECK-NEXT: 2 1 0.50 * sttr w16, [x0, #-256]
@@ -2363,16 +2363,16 @@ drps
# CHECK-NEXT: 1 4 0.33 * ldrsb x18, [x22, w10, sxtw]
# CHECK-NEXT: 1 4 0.33 * ldrsh w3, [sp, x5]
# CHECK-NEXT: 1 4 0.33 * ldrsh w9, [x27, x6]
-# CHECK-NEXT: 1 4 0.33 * ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: 2 5 0.33 * ldrh w10, [x30, x7, lsl #1]
# CHECK-NEXT: 2 1 0.50 * strh w11, [x29, x3, sxtx]
# CHECK-NEXT: 1 4 0.33 * ldrh w12, [x28, xzr, sxtx]
-# CHECK-NEXT: 1 4 0.33 * ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: 2 5 0.33 * ldrsh x13, [x27, x5, sxtx #1]
# CHECK-NEXT: 1 4 0.33 * ldrh w14, [x26, w6, uxtw]
# CHECK-NEXT: 1 4 0.33 * ldrh w15, [x25, w7, uxtw]
-# CHECK-NEXT: 1 4 0.33 * ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: 2 5 0.33 * ldrsh w16, [x24, w8, uxtw #1]
# CHECK-NEXT: 1 4 0.33 * ldrh w17, [x23, w9, sxtw]
# CHECK-NEXT: 1 4 0.33 * ldrh w18, [x22, w10, sxtw]
-# CHECK-NEXT: 2 1 0.50 * strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 3 2 0.50 * strh w19, [x21, wzr, sxtw #1]
# CHECK-NEXT: 1 4 0.33 * ldr w3, [sp, x5]
# CHECK-NEXT: 1 6 0.33 * ldr s9, [x27, x6]
# CHECK-NEXT: 1 4 0.33 * ldr w10, [x30, x7, lsl #2]
@@ -2397,28 +2397,28 @@ drps
# CHECK-NEXT: 1 4 0.33 * ldr x17, [x23, w9, sxtw]
# CHECK-NEXT: 1 4 0.33 * ldr x18, [x22, w10, sxtw]
# CHECK-NEXT: 2 2 0.50 * str d19, [x21, wzr, sxtw #3]
-# CHECK-NEXT: 2 7 0.33 * ldr q3, [sp, x5]
-# CHECK-NEXT: 2 7 0.33 * ldr q9, [x27, x6]
+# CHECK-NEXT: 1 6 0.33 * ldr q3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr q9, [x27, x6]
# CHECK-NEXT: 2 7 0.33 * ldr q10, [x30, x7, lsl #4]
-# CHECK-NEXT: 3 2 0.50 * str q11, [x29, x3, sxtx]
-# CHECK-NEXT: 3 2 0.50 * str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str q11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str q12, [x28, xzr, sxtx]
# CHECK-NEXT: 3 2 0.50 * str q13, [x27, x5, sxtx #4]
-# CHECK-NEXT: 2 7 0.33 * ldr q14, [x26, w6, uxtw]
-# CHECK-NEXT: 2 7 0.33 * ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr q15, [x25, w7, uxtw]
# CHECK-NEXT: 2 7 0.33 * ldr q16, [x24, w8, uxtw #4]
-# CHECK-NEXT: 2 7 0.33 * ldr q17, [x23, w9, sxtw]
-# CHECK-NEXT: 3 2 0.50 * str q18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str q18, [x22, w10, sxtw]
# CHECK-NEXT: 2 7 0.33 * ldr q19, [x21, wzr, sxtw #4]
# CHECK-NEXT: 1 4 0.33 * ldp w3, w5, [sp]
# CHECK-NEXT: 2 1 0.50 * stp wzr, w9, [sp, #252]
# CHECK-NEXT: 1 4 0.33 * ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: 1 4 0.33 * ldp w9, w10, [sp, #4]
-# CHECK-NEXT: 2 5 0.33 * ldpsw x9, x10, [sp, #4]
-# CHECK-NEXT: 2 5 0.33 * ldpsw x9, x10, [x2, #-256]
-# CHECK-NEXT: 2 5 0.33 * ldpsw x20, x30, [sp, #252]
-# CHECK-NEXT: 2 4 0.67 * ldp x21, x29, [x2, #504]
-# CHECK-NEXT: 2 4 0.67 * ldp x22, x23, [x3, #-512]
-# CHECK-NEXT: 2 4 0.67 * ldp x24, x25, [x4, #8]
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: 1 4 1.00 * ldp x21, x29, [x2, #504]
+# CHECK-NEXT: 1 4 1.00 * ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: 1 4 1.00 * ldp x24, x25, [x4, #8]
# CHECK-NEXT: 1 6 0.33 * ldp s29, s28, [sp, #252]
# CHECK-NEXT: 2 2 0.50 * stp s27, s26, [sp, #-256]
# CHECK-NEXT: 1 6 0.33 * ldp s1, s2, [x3, #44]
@@ -2427,52 +2427,52 @@ drps
# CHECK-NEXT: 1 6 0.33 * ldp d2, d3, [x30, #-8]
# CHECK-NEXT: 2 2 0.50 * stp q3, q5, [sp]
# CHECK-NEXT: 2 2 0.50 * stp q17, q19, [sp, #1008]
-# CHECK-NEXT: 2 6 0.67 * ldp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 1 6 0.67 * ldp q23, q29, [x1, #-1024]
# CHECK-NEXT: 2 4 0.33 * ldp w3, w5, [sp], #0
# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp], #252
# CHECK-NEXT: 2 4 0.33 * ldp w2, wzr, [sp], #-256
# CHECK-NEXT: 2 4 0.33 * ldp w9, w10, [sp], #4
-# CHECK-NEXT: 3 5 0.50 * ldpsw x9, x10, [sp], #4
-# CHECK-NEXT: 3 5 0.50 * ldpsw x9, x10, [x2], #-256
-# CHECK-NEXT: 3 5 0.50 * ldpsw x20, x30, [sp], #252
-# CHECK-NEXT: 3 4 0.67 * ldp x21, x29, [x2], #504
-# CHECK-NEXT: 3 4 0.67 * ldp x22, x23, [x3], #-512
-# CHECK-NEXT: 3 4 0.67 * ldp x24, x25, [x4], #8
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: 2 4 1.00 * ldp x21, x29, [x2], #504
+# CHECK-NEXT: 2 4 1.00 * ldp x22, x23, [x3], #-512
+# CHECK-NEXT: 2 4 1.00 * ldp x24, x25, [x4], #8
# CHECK-NEXT: 2 6 0.33 * ldp s29, s28, [sp], #252
# CHECK-NEXT: 3 2 0.50 * stp s27, s26, [sp], #-256
# CHECK-NEXT: 2 6 0.33 * ldp s1, s2, [x3], #44
# CHECK-NEXT: 3 2 0.50 * stp d3, d5, [x9], #504
# CHECK-NEXT: 3 2 0.50 * stp d7, d11, [x10], #-512
# CHECK-NEXT: 2 6 0.33 * ldp d2, d3, [x30], #-8
-# CHECK-NEXT: 4 2 1.00 * stp q3, q5, [sp], #0
-# CHECK-NEXT: 4 2 1.00 * stp q17, q19, [sp], #1008
-# CHECK-NEXT: 3 6 0.67 * ldp q23, q29, [x1], #-1024
+# CHECK-NEXT: 3 2 1.00 * stp q3, q5, [sp], #0
+# CHECK-NEXT: 3 2 1.00 * stp q17, q19, [sp], #1008
+# CHECK-NEXT: 2 6 0.67 * ldp q23, q29, [x1], #-1024
# CHECK-NEXT: 2 4 0.33 * ldp w3, w5, [sp, #0]!
# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp, #252]!
# CHECK-NEXT: 2 4 0.33 * ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: 2 4 0.33 * ldp w9, w10, [sp, #4]!
-# CHECK-NEXT: 3 5 0.50 * ldpsw x9, x10, [sp, #4]!
-# CHECK-NEXT: 3 5 0.50 * ldpsw x9, x10, [x2, #-256]!
-# CHECK-NEXT: 3 5 0.50 * ldpsw x20, x30, [sp, #252]!
-# CHECK-NEXT: 3 4 0.67 * ldp x21, x29, [x2, #504]!
-# CHECK-NEXT: 3 4 0.67 * ldp x22, x23, [x3, #-512]!
-# CHECK-NEXT: 3 4 0.67 * ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: 2 4 1.00 * ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: 2 4 1.00 * ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: 2 4 1.00 * ldp x24, x25, [x4, #8]!
# CHECK-NEXT: 2 6 0.33 * ldp s29, s28, [sp, #252]!
# CHECK-NEXT: 3 2 0.50 * stp s27, s26, [sp, #-256]!
# CHECK-NEXT: 2 6 0.33 * ldp s1, s2, [x3, #44]!
# CHECK-NEXT: 3 2 0.50 * stp d3, d5, [x9, #504]!
# CHECK-NEXT: 3 2 0.50 * stp d7, d11, [x10, #-512]!
# CHECK-NEXT: 2 6 0.33 * ldp d2, d3, [x30, #-8]!
-# CHECK-NEXT: 4 2 1.00 * stp q3, q5, [sp, #0]!
-# CHECK-NEXT: 4 2 1.00 * stp q17, q19, [sp, #1008]!
-# CHECK-NEXT: 3 6 0.67 * ldp q23, q29, [x1, #-1024]!
+# CHECK-NEXT: 3 2 1.00 * stp q3, q5, [sp, #0]!
+# CHECK-NEXT: 3 2 1.00 * stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: 2 6 0.67 * ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: 1 4 0.33 * ldnp w3, w5, [sp]
# CHECK-NEXT: 2 1 0.50 * stnp wzr, w9, [sp, #252]
# CHECK-NEXT: 1 4 0.33 * ldnp w2, wzr, [sp, #-256]
# CHECK-NEXT: 1 4 0.33 * ldnp w9, w10, [sp, #4]
-# CHECK-NEXT: 2 4 0.67 * ldnp x21, x29, [x2, #504]
-# CHECK-NEXT: 2 4 0.67 * ldnp x22, x23, [x3, #-512]
-# CHECK-NEXT: 2 4 0.67 * ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: 1 4 1.00 * ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: 1 4 1.00 * ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: 1 4 1.00 * ldnp x24, x25, [x4, #8]
# CHECK-NEXT: 1 6 0.33 * ldnp s29, s28, [sp, #252]
# CHECK-NEXT: 2 2 0.50 * stnp s27, s26, [sp, #-256]
# CHECK-NEXT: 1 6 0.33 * ldnp s1, s2, [x3, #44]
@@ -2481,7 +2481,7 @@ drps
# CHECK-NEXT: 1 6 0.33 * ldnp d2, d3, [x30, #-8]
# CHECK-NEXT: 2 2 0.50 * stnp q3, q5, [sp]
# CHECK-NEXT: 2 2 0.50 * stnp q17, q19, [sp, #1008]
-# CHECK-NEXT: 2 6 0.67 * ldnp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 1 6 0.67 * ldnp q23, q29, [x1, #-1024]
# CHECK-NEXT: 1 1 0.25 mov w3, #983055
# CHECK-NEXT: 1 1 0.25 mov x10, #-6148914691236517206
# CHECK-NEXT: 1 1 0.25 and w12, w23, w21
@@ -2502,12 +2502,12 @@ drps
# CHECK-NEXT: 1 1 0.25 orr x8, x9, x10, lsl #12
# CHECK-NEXT: 1 1 0.25 orn x3, x5, x7, asr #2
# CHECK-NEXT: 1 1 0.25 orn w2, w5, w29
-# CHECK-NEXT: 1 2 0.50 ands w7, wzr, w9, lsl #1
-# CHECK-NEXT: 1 2 0.50 ands x3, x5, x20, ror #63
-# CHECK-NEXT: 1 2 0.50 bics w3, w5, w7
-# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
-# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
-# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
+# CHECK-NEXT: 2 2 0.50 ands w7, wzr, w9, lsl #1
+# CHECK-NEXT: 2 1 0.33 ands x3, x5, x20, ror #63
+# CHECK-NEXT: 2 1 0.33 bics w3, w5, w7
+# CHECK-NEXT: 2 2 0.50 bics x3, xzr, x3, lsl #1
+# CHECK-NEXT: 2 2 0.50 tst w3, w7, lsl #31
+# CHECK-NEXT: 2 2 0.50 tst x2, x20, asr #2
# CHECK-NEXT: 1 1 0.25 mov x3, x6
# CHECK-NEXT: 1 1 0.25 mov x3, xzr
# CHECK-NEXT: 1 1 0.25 mov wzr, w2
@@ -2562,7 +2562,7 @@ drps
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
-# CHECK-NEXT: 11.00 11.00 33.00 33.00 45.33 45.33 45.33 96.33 162.33 162.33 306.50 205.50 139.00 139.00 167.50 44.50 51.50 9.50
+# CHECK-NEXT: 11.00 11.00 33.00 33.00 45.33 45.33 45.33 106.33 172.33 172.33 338.75 193.75 125.75 125.75 183.50 46.50 29.50 9.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
@@ -2581,16 +2581,16 @@ drps
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - sub w4, w20, #546, lsl #12
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - sub sp, sp, #288
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - sub wsp, w19, #16
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - adds w13, w23, #291, lsl #12
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - adds w13, w23, #291, lsl #12
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmn w2, #4095
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - adds w20, wsp, #0
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmn x3, #1, lsl #12
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmp sp, #20, lsl #12
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - cmn x3, #1, lsl #12
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - cmp sp, #20, lsl #12
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmp x30, #4095
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - subs x4, sp, #3822
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmn w3, #291, lsl #12
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - cmn w3, #291, lsl #12
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmn wsp, #1365
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - cmn sp, #1092, lsl #12
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - cmn sp, #1092, lsl #12
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - mov sp, x30
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - mov wsp, w20
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - mov x11, sp
@@ -2973,10 +2973,10 @@ drps
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - clz x26, x4
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - cls w3, w5
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - cls x20, x5
-# CHECK-NEXT: - - - - - - - - - - 5.00 - - - - - - - udiv w0, w7, w10
-# CHECK-NEXT: - - - - - - - - - - 5.00 - - - - - - - udiv x9, x22, x4
-# CHECK-NEXT: - - - - - - - - - - 5.00 - - - - - - - sdiv w12, w21, w0
-# CHECK-NEXT: - - - - - - - - - - 5.00 - - - - - - - sdiv x13, x2, x1
+# CHECK-NEXT: - - - - - - - - - - 12.00 - - - - - - - udiv w0, w7, w10
+# CHECK-NEXT: - - - - - - - - - - 20.00 - - - - - - - udiv x9, x22, x4
+# CHECK-NEXT: - - - - - - - - - - 12.00 - - - - - - - sdiv w12, w21, w0
+# CHECK-NEXT: - - - - - - - - - - 20.00 - - - - - - - sdiv x13, x2, x1
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - lsl w11, w12, w13
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - lsl x14, x15, x16
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - lsr w17, w18, w19
@@ -3044,10 +3044,10 @@ drps
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - umull x11, w13, w17
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - smnegl x11, w13, w17
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - umnegl x11, w13, w17
-# CHECK-NEXT: - - - - - - - - - - 0.75 0.75 0.25 0.25 - - - - extr w3, w5, w7, #0
-# CHECK-NEXT: - - - - - - - - - - 0.75 0.75 0.25 0.25 - - - - extr w11, w13, w17, #31
-# CHECK-NEXT: - - - - - - - - - - 0.75 0.75 0.25 0.25 - - - - extr x3, x5, x7, #15
-# CHECK-NEXT: - - - - - - - - - - 0.75 0.75 0.25 0.25 - - - - extr x11, x13, x17, #63
+# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - extr w3, w5, w7, #0
+# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - extr w11, w13, w17, #31
+# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - extr x3, x5, x7, #15
+# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - extr x11, x13, x17, #63
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - ror x19, x23, #24
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - ror x29, xzr, #63
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - ror w9, w13, #31
@@ -3080,7 +3080,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s0, s1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs s2, s3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg s4, s5
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt s6, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fsqrt s6, s7
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt d8, s9
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt h10, s11
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintn s12, s13
@@ -3093,7 +3093,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov d0, d1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs d2, d3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg d4, d5
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt d6, d7
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fsqrt d6, d7
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt s8, d9
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt h10, d11
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintn d12, d13
@@ -3106,7 +3106,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt s26, h27
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt d28, h29
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul s20, s19, s17
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fdiv s1, s2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 - 1.50 - fdiv s1, s2, s3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd s4, s5, s6
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub s7, s8, s9
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax s10, s11, s12
@@ -3131,42 +3131,42 @@ drps
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmadd d3, d13, d0, d23
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsub s3, s5, s6, s31
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsub d3, d13, d0, d23
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w3, h5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs wzr, h20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w19, h0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x3, h5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x12, h30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x19, h0, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w3, s5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs wzr, s20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w19, s0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x3, s5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x12, s30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x19, s0, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w3, d5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs wzr, d20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs w19, d0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x3, d5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x12, d30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs x19, d0, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w3, h5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu wzr, h20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w19, h0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x3, h5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x12, h30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x19, h0, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w3, s5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu wzr, s20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w19, s0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x3, s5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x12, s30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x19, s0, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w3, d5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu wzr, d20, #13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu w19, d0, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x3, d5, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x12, d30, #45
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu x19, d0, #64
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - scvtf h23, w19, #1
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - scvtf h31, wzr, #20
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - scvtf h14, w0, #32
@@ -3371,9 +3371,9 @@ drps
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w19, [sp], #255
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w20, [x30], #1
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w21, [x12], #-256
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str xzr, [x9], #255
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str x2, [x3], #1
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str x19, [x12], #-256
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str xzr, [x9], #255
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str x2, [x3], #1
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str x19, [x12], #-256
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w9, [x2], #255
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w10, [x3], #1
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w10, [x3], #-256
@@ -3401,18 +3401,18 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh wzr, [x9], #255
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh w2, [x3], #1
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh w19, [x12], #-256
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b0, [x0], #255
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b3, [x3], #1
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b5, [sp], #-256
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h10, [x10], #255
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h13, [x23], #1
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h15, [sp], #-256
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s20, [x20], #255
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s23, [x23], #1
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s25, [x0], #-256
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d20, [x20], #255
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d23, [x23], #1
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b0, [x0], #255
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b3, [x3], #1
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b5, [sp], #-256
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h10, [x10], #255
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h13, [x23], #1
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h15, [sp], #-256
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s20, [x20], #255
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s23, [x23], #1
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d20, [x20], #255
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d23, [x23], #1
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d25, [x0], #-256
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b0, [x0], #255
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b3, [x3], #1
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b5, [sp], #-256
@@ -3428,9 +3428,9 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q20, [x1], #255
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q23, [x9], #1
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q25, [x20], #-256
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q10, [x1], #255
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q22, [sp], #1
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q21, [x20], #-256
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q10, [x1], #255
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q22, [sp], #1
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q21, [x20], #-256
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr x3, [x4, #0]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - strb w9, [x2, #255]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - strb w10, [x3, #1]!
@@ -3441,9 +3441,9 @@ drps
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w19, [sp, #255]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w20, [x30, #1]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str w21, [x12, #-256]!
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str xzr, [x9, #255]!
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str x2, [x3, #1]!
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - str x19, [x12, #-256]!
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str xzr, [x9, #255]!
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str x2, [x3, #1]!
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - str x19, [x12, #-256]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w9, [x2, #255]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w10, [x3, #1]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrb w10, [x3, #-256]!
@@ -3471,18 +3471,18 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh wzr, [x9, #255]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh w2, [x3, #1]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh w19, [x12, #-256]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b0, [x0, #255]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b3, [x3, #1]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str b5, [sp, #-256]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h10, [x10, #255]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h13, [x23, #1]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str h15, [sp, #-256]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s20, [x20, #255]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s23, [x23, #1]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str s25, [x0, #-256]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d20, [x20, #255]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d23, [x23, #1]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str d25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b0, [x0, #255]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b3, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str b5, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h10, [x10, #255]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h13, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str h15, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str s25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d25, [x0, #-256]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b0, [x0, #255]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b3, [x3, #1]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr b5, [sp, #-256]!
@@ -3498,9 +3498,9 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q20, [x1, #255]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q23, [x9, #1]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q25, [x20, #-256]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q10, [x1, #255]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q22, [sp, #1]!
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q21, [x20, #-256]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q10, [x1, #255]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q22, [sp, #1]!
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q21, [x20, #-256]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - sttrb w9, [sp]
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - sttrh wzr, [x12, #255]
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - sttr w16, [x0, #-256]
@@ -3552,16 +3552,16 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrsb x18, [x22, w10, sxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrsh w3, [sp, x5]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrsh w9, [x27, x6]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrh w10, [x30, x7, lsl #1]
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - strh w11, [x29, x3, sxtx]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w12, [x28, xzr, sxtx]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh x13, [x27, x5, sxtx #1]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w14, [x26, w6, uxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w15, [x25, w7, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldrsh w16, [x24, w8, uxtw #1]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w17, [x23, w9, sxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldrh w18, [x22, w10, sxtw]
-# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - strh w19, [x21, wzr, sxtw #1]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr w3, [sp, x5]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr s9, [x27, x6]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr w10, [x30, x7, lsl #2]
@@ -3586,28 +3586,28 @@ drps
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr x17, [x23, w9, sxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr x18, [x22, w10, sxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str d19, [x21, wzr, sxtw #3]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q3, [sp, x5]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q9, [x27, x6]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr q3, [sp, x5]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr q9, [x27, x6]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q10, [x30, x7, lsl #4]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q11, [x29, x3, sxtx]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q12, [x28, xzr, sxtx]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q13, [x27, x5, sxtx #4]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q14, [x26, w6, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr q15, [x25, w7, uxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q16, [x24, w8, uxtw #4]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q17, [x23, w9, sxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - str q18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - str q18, [x22, w10, sxtw]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldr q19, [x21, wzr, sxtw #4]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldp w3, w5, [sp]
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - stp wzr, w9, [sp, #252]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldp w2, wzr, [sp, #-256]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldp w9, w10, [sp, #4]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [sp, #4]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [x2, #-256]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldpsw x20, x30, [sp, #252]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldp x21, x29, [x2, #504]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldp x22, x23, [x3, #-512]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldp x24, x25, [x4, #8]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldp s29, s28, [sp, #252]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - stp s27, s26, [sp, #-256]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldp s1, s2, [x3, #44]
@@ -3621,47 +3621,47 @@ drps
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - stp wzr, w9, [sp], #252
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp w2, wzr, [sp], #-256
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp w9, w10, [sp], #4
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x9, x10, [sp], #4
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x9, x10, [x2], #-256
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x20, x30, [sp], #252
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x21, x29, [x2], #504
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x22, x23, [x3], #-512
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x24, x25, [x4], #8
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x21, x29, [x2], #504
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x22, x23, [x3], #-512
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x24, x25, [x4], #8
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp s29, s28, [sp], #252
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp s27, s26, [sp], #-256
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp s1, s2, [x3], #44
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp d3, d5, [x9], #504
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp d7, d11, [x10], #-512
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp d2, d3, [x30], #-8
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 0.50 0.50 - - stp q3, q5, [sp], #0
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 0.50 0.50 - - stp q17, q19, [sp], #1008
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - stp q3, q5, [sp], #0
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - stp q17, q19, [sp], #1008
# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp q23, q29, [x1], #-1024
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp w3, w5, [sp, #0]!
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 0.25 0.25 0.25 0.25 - - - - stp wzr, w9, [sp, #252]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp w2, wzr, [sp, #-256]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp w9, w10, [sp, #4]!
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x9, x10, [sp, #4]!
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x9, x10, [x2, #-256]!
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - ldpsw x20, x30, [sp, #252]!
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x21, x29, [x2, #504]!
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x22, x23, [x3, #-512]!
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 - - - - ldp x24, x25, [x4, #8]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp s29, s28, [sp, #252]!
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp s27, s26, [sp, #-256]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp s1, s2, [x3, #44]!
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp d3, d5, [x9, #504]!
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.25 0.25 0.25 0.25 0.50 0.50 - - stp d7, d11, [x10, #-512]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - ldp d2, d3, [x30, #-8]!
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 0.50 0.50 - - stp q3, q5, [sp, #0]!
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 0.50 0.50 - - stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - stp q3, q5, [sp, #0]!
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - stp q17, q19, [sp, #1008]!
# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ldp q23, q29, [x1, #-1024]!
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldnp w3, w5, [sp]
# CHECK-NEXT: - - 0.50 0.50 - - - - 0.50 0.50 - - - - - - - - stnp wzr, w9, [sp, #252]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldnp w2, wzr, [sp, #-256]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldnp w9, w10, [sp, #4]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldnp x21, x29, [x2, #504]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldnp x22, x23, [x3, #-512]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - - - - - ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - - - - - ldnp x24, x25, [x4, #8]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldnp s29, s28, [sp, #252]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - stnp s27, s26, [sp, #-256]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ldnp s1, s2, [x3, #44]
@@ -3692,8 +3692,8 @@ drps
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - orn x3, x5, x7, asr #2
# CHECK-NEXT: - - - - - - - - - - 0.25 0.25 0.25 0.25 - - - - orn w2, w5, w29
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - ands w7, wzr, w9, lsl #1
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - ands x3, x5, x20, ror #63
-# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - bics w3, w5, w7
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - ands x3, x5, x20, ror #63
+# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.25 0.25 0.25 0.25 - - - - bics w3, w5, w7
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - bics x3, xzr, x3, lsl #1
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - tst w3, w7, lsl #31
# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - 0.50 0.50 - - - - - - tst x2, x20, asr #2
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
index 4de37f960005202..4d099f585143709 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
@@ -893,10 +893,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 500
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -931,10 +931,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 500
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -969,10 +969,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
-# CHECK-NEXT: Total uOps: 500
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: uOps Per Cycle: 0.29
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 5.0
@@ -1007,10 +1007,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 500
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1045,10 +1045,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 800
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1063,7 +1063,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. mla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] .D============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] D=============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1075,18 +1075,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.5 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
# CHECK: [22] Code Region - Z mad.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 800
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1101,7 +1101,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. mad z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. mad z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] .D============================eeeeeER mad z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] D=============================eeeeeER mad z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1113,18 +1113,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.0 0.0 0.0 mad z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.5 0.0 0.0 mad z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
# CHECK: [23] Code Region - Z msb.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 800
+# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1139,7 +1139,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. msb z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. msb z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] .D============================eeeeeER msb z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] D=============================eeeeeER msb z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1151,8 +1151,8 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.0 0.0 0.0 msb z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.5 0.0 0.0 msb z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
# CHECK: [24] Code Region - Z fcmla ZPmZZ
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
index 20e76ef27c4702e..bc7bc2cdb14b1d7 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
@@ -1248,17 +1248,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 addp v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.50 addv s0, v0.4s
# CHECK-NEXT: 1 2 0.50 addv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 addv h0, v0.8h
-# CHECK-NEXT: 2 4 0.50 addv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 addv b0, v0.16b
+# CHECK-NEXT: 1 4 0.50 addv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 addv b0, v0.8b
+# CHECK-NEXT: 1 4 1.00 addv b0, v0.16b
# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 aesmc v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 and v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 3 0.50 bfcvt h0, s0
-# CHECK-NEXT: 1 4 0.50 bfcvtn v0.4h, v0.4s
-# CHECK-NEXT: 1 4 0.50 bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 4 1.00 bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 1.00 bfcvtn2 v0.8h, v0.4s
# CHECK-NEXT: 1 4 0.25 bfdot v0.2s, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.25 bfdot v0.4s, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.25 bfmlalb v0.4s, v0.8h, v0.8h
@@ -1374,113 +1374,113 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fcmlt s10, s11, #0.0
# CHECK-NEXT: 1 2 0.25 fcmlt v0.4s, v0.4s, #0.0
# CHECK-NEXT: 1 3 0.50 fcvtas d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtas s12, s13
-# CHECK-NEXT: 4 6 1.00 fcvtas h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtas s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtas h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtas v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtas v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtas v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtas v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtas v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtas v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtas v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtas v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtau d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtau s12, s13
-# CHECK-NEXT: 4 6 1.00 fcvtau h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtau s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtau h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtau v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtau v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtau v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtau v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtau v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtau v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtau v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtau v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtl v0.2d, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtl v0.4s, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtl v0.4s, v0.4h
# CHECK-NEXT: 1 3 0.50 fcvtl2 v0.2d, v0.4s
-# CHECK-NEXT: 2 4 1.00 fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtl2 v0.4s, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtms d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtms s22, s13
-# CHECK-NEXT: 4 6 1.00 fcvtms h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtms s22, s13
+# CHECK-NEXT: 1 3 0.50 fcvtms h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtms v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtms v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtms v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtms v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtms v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtms v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtms v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtms v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtmu d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtmu s12, s13
-# CHECK-NEXT: 4 6 1.00 fcvtmu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtmu s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtmu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtmu v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtmu v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtmu v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtn v0.2s, v0.2d
-# CHECK-NEXT: 2 4 1.00 fcvtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 1.00 fcvtn v0.4h, v0.4s
# CHECK-NEXT: 1 3 0.50 fcvtn2 v0.4s, v0.2d
-# CHECK-NEXT: 2 4 1.00 fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 4 1.00 fcvtn2 v0.8h, v0.4s
# CHECK-NEXT: 1 3 0.50 fcvtns d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtns s22, s13
-# CHECK-NEXT: 4 6 1.00 fcvtns h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtns s22, s13
+# CHECK-NEXT: 1 3 0.50 fcvtns h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtns v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtns v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtns v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtns v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtns v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtns v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtns v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtns v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtnu d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtnu s12, s13
-# CHECK-NEXT: 4 6 1.00 fcvtnu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtnu s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtnu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtnu v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtps d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtps s22, s13
-# CHECK-NEXT: 4 6 1.00 fcvtps h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtps s22, s13
+# CHECK-NEXT: 1 3 0.50 fcvtps h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtps v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtps v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtps v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtps v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtps v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtps v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtps v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtps v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtpu d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtpu s12, s13
-# CHECK-NEXT: 4 6 1.00 fcvtpu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtpu s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtpu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 fcvtpu v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtpu v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 fcvtpu v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtxn s22, d13
# CHECK-NEXT: 1 3 0.50 fcvtxn v0.2s, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtxn2 v0.4s, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtzs s12, s13
-# CHECK-NEXT: 2 4 1.00 fcvtzs s21, s12, #1
-# CHECK-NEXT: 4 6 1.00 fcvtzs h21, h14
-# CHECK-NEXT: 4 6 1.00 fcvtzs h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtzs s21, s12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs h21, h14
+# CHECK-NEXT: 1 3 0.50 fcvtzs h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s, #3
-# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s, #3
-# CHECK-NEXT: 4 6 1.00 fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 6 2.00 fcvtzs v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d14
-# CHECK-NEXT: 2 4 1.00 fcvtzu s12, s13
-# CHECK-NEXT: 2 4 1.00 fcvtzu s21, s12, #1
-# CHECK-NEXT: 4 6 1.00 fcvtzu h12, h13
-# CHECK-NEXT: 4 6 1.00 fcvtzu h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu s12, s13
+# CHECK-NEXT: 1 3 0.50 fcvtzu s21, s12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtzu h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s, #3
-# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s, #3
-# CHECK-NEXT: 4 6 1.00 fcvtzu v0.8h, v0.8h
-# CHECK-NEXT: 1 15 3.50 fdiv v0.2d, v0.2d, v0.2d
-# CHECK-NEXT: 1 10 3.50 fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 6 2.00 fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: 1 15 7.00 fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 10 2.50 fdiv v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 7 3.50 fdiv v0.4h, v0.4h, v0.4h
-# CHECK-NEXT: 1 10 3.50 fdiv v0.4s, v0.4s, v0.4s
-# CHECK-NEXT: 1 13 2.50 fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 10 4.50 fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 13 6.50 fdiv v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fmax v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 fmax v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 fmax v0.4s, v0.4s, v0.4s
@@ -1493,9 +1493,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fmaxp v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 fmaxp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 fmaxp v0.4s, v0.4s, v0.4s
-# CHECK-NEXT: 2 4 0.50 fmaxv h0, v0.4h
-# CHECK-NEXT: 3 6 0.75 fmaxv h0, v0.8h
-# CHECK-NEXT: 2 4 0.50 fmaxv s0, v0.4s
+# CHECK-NEXT: 1 4 0.50 fmaxv h0, v0.4h
+# CHECK-NEXT: 1 6 0.75 fmaxv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 fmaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 fmin v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 fmin v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 fmin v0.4s, v0.4s, v0.4s
@@ -1537,8 +1537,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 fmul s0, s1, v0.s[3]
# CHECK-NEXT: 1 3 0.25 fmul v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.25 fmulx d0, d4, v0.d[1]
-# CHECK-NEXT: 1 2 0.25 fmulx d23, d11, d1
-# CHECK-NEXT: 1 2 0.25 fmulx s20, s22, s15
+# CHECK-NEXT: 1 3 0.25 fmulx d23, d11, d1
+# CHECK-NEXT: 1 3 0.25 fmulx s20, s22, s15
# CHECK-NEXT: 1 3 0.25 fmulx s3, s5, v0.s[3]
# CHECK-NEXT: 1 3 0.25 fmulx v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.25 fmulx v0.2s, v0.2s, v0.2s
@@ -1550,11 +1550,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fneg v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frecpe d13, d13
# CHECK-NEXT: 1 3 0.50 frecpe s19, s14
-# CHECK-NEXT: 1 4 0.50 frecpe v0.2d, v0.2d
+# CHECK-NEXT: 1 4 1.00 frecpe v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frecpe v0.2s, v0.2s
-# CHECK-NEXT: 1 4 0.50 frecpe v0.4h, v0.4h
-# CHECK-NEXT: 1 4 0.50 frecpe v0.4s, v0.4s
-# CHECK-NEXT: 2 6 1.00 frecpe v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frecpe v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frecpe v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frecpe v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.25 frecps v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 4 0.25 frecps d22, d30, d21
# CHECK-NEXT: 1 4 0.25 frecps s21, s16, s13
@@ -1562,63 +1562,63 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frecpx s18, s10
# CHECK-NEXT: 1 3 0.50 frinta v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frinta v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frinta v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frinta v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frinta v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frinta v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frinta v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frinta v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frinti v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frinti v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frinti v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frinti v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frinti v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frinti v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frinti v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frinti v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frintm v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frintm v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frintm v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frintm v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frintm v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frintm v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frintm v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frintm v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frintn v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frintn v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frintn v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frintn v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frintn v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frintn v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frintn v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frintn v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frintp v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frintp v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frintp v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frintp v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frintp v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frintp v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frintp v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frintp v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frintx v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frintx v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frintx v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frintx v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frintx v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frintx v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frintx v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frintx v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frintz v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frintz v0.2s, v0.2s
-# CHECK-NEXT: 2 4 1.00 frintz v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 frintz v0.4s, v0.4s
-# CHECK-NEXT: 4 6 1.00 frintz v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frintz v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frintz v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frintz v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 frsqrte d21, d12
# CHECK-NEXT: 1 3 0.50 frsqrte s22, s13
-# CHECK-NEXT: 1 4 0.50 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: 1 4 1.00 frsqrte v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 frsqrte v0.2s, v0.2s
-# CHECK-NEXT: 1 4 0.50 frsqrte v0.4h, v0.4h
-# CHECK-NEXT: 1 4 0.50 frsqrte v0.4s, v0.4s
-# CHECK-NEXT: 2 6 1.00 frsqrte v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 frsqrte v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 frsqrte v0.4s, v0.4s
+# CHECK-NEXT: 1 6 2.00 frsqrte v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.25 frsqrts d8, d22, d18
# CHECK-NEXT: 1 4 0.25 frsqrts s21, s5, s12
# CHECK-NEXT: 1 4 0.25 frsqrts v0.2d, v0.2d, v0.2d
-# CHECK-NEXT: 1 16 3.50 fsqrt v0.2d, v0.2d
-# CHECK-NEXT: 1 10 3.50 fsqrt v0.2s, v0.2s
+# CHECK-NEXT: 1 16 7.50 fsqrt v0.2d, v0.2d
+# CHECK-NEXT: 1 10 2.50 fsqrt v0.2s, v0.2s
# CHECK-NEXT: 1 7 3.50 fsqrt v0.4h, v0.4h
-# CHECK-NEXT: 1 10 3.50 fsqrt v0.4s, v0.4s
-# CHECK-NEXT: 1 13 5.50 fsqrt v0.8h, v0.8h
+# CHECK-NEXT: 1 10 4.50 fsqrt v0.4s, v0.4s
+# CHECK-NEXT: 1 13 6.50 fsqrt v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fsub v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 6 0.33 * ld1 { v0.16b }, [x0]
-# CHECK-NEXT: 3 6 0.67 * ld1 { v0.8h, v1.8h }, [sp], #32
-# CHECK-NEXT: 4 6 1.00 * ld1 { v0.4s, v1.4s, v2.4s }, [x0], #48
-# CHECK-NEXT: 4 7 1.33 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 2 6 0.67 * ld1 { v0.8h, v1.8h }, [sp], #32
+# CHECK-NEXT: 2 6 1.00 * ld1 { v0.4s, v1.4s, v2.4s }, [x0], #48
+# CHECK-NEXT: 1 7 1.33 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
# CHECK-NEXT: 2 6 0.33 * ld1 { v0.1d }, [x15], x2
-# CHECK-NEXT: 2 6 0.67 * ld1 { v0.2s, v1.2s }, [x15]
-# CHECK-NEXT: 3 6 1.00 * ld1 { v0.4h, v1.4h, v2.4h }, [sp]
-# CHECK-NEXT: 3 6 0.67 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 1 6 0.67 * ld1 { v0.2s, v1.2s }, [x15]
+# CHECK-NEXT: 1 6 1.00 * ld1 { v0.4h, v1.4h, v2.4h }, [sp]
+# CHECK-NEXT: 2 6 0.67 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
# CHECK-NEXT: 2 8 0.33 * ld1 { v0.b }[7], [x0]
# CHECK-NEXT: 3 8 0.33 * ld1 { v0.h }[3], [x0], #2
# CHECK-NEXT: 2 8 0.33 * ld1 { v0.s }[1], [x15]
@@ -1627,30 +1627,30 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 3 8 0.33 * ld1r { v0.8h }, [x0], #2
# CHECK-NEXT: 2 8 0.33 * ld1r { v0.4s }, [x15]
# CHECK-NEXT: 3 8 0.33 * ld1r { v0.2d }, [x15], x16
-# CHECK-NEXT: 4 8 0.67 * ld2 { v0.16b, v1.16b }, [x0]
-# CHECK-NEXT: 4 8 0.50 * ld2 { v0.8b, v1.8b }, [x0], #16
-# CHECK-NEXT: 3 8 0.50 * ld2 { v0.h, v1.h }[7], [x15]
-# CHECK-NEXT: 4 8 0.50 * ld2 { v0.h, v1.h }[7], [x15], x8
-# CHECK-NEXT: 3 8 0.50 * ld2r { v0.8b, v1.8b }, [x0]
-# CHECK-NEXT: 4 8 0.50 * ld2r { v0.4h, v1.4h }, [x0], #4
-# CHECK-NEXT: 3 8 0.50 * ld2r { v0.2s, v1.2s }, [sp]
-# CHECK-NEXT: 4 8 0.50 * ld2r { v0.1d, v1.1d }, [sp], x8
-# CHECK-NEXT: 5 8 0.75 * ld3 { v0.4h, v1.4h, v2.4h }, [x15]
-# CHECK-NEXT: 7 8 1.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], #48
-# CHECK-NEXT: 5 8 0.75 * ld3 { v0.s, v1.s, v2.s }[3], [sp]
-# CHECK-NEXT: 6 8 0.75 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
-# CHECK-NEXT: 5 8 0.75 * ld3r { v0.8b, v1.8b, v2.8b }, [x15]
-# CHECK-NEXT: 6 8 0.75 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
-# CHECK-NEXT: 5 8 0.75 * ld3r { v0.2s, v1.2s, v2.2s }, [x0]
-# CHECK-NEXT: 6 8 0.75 * ld3r { v0.1d, v1.1d, v2.1d }, [x0], x0
-# CHECK-NEXT: 7 8 1.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
-# CHECK-NEXT: 9 9 1.33 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
-# CHECK-NEXT: 7 8 1.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
-# CHECK-NEXT: 8 8 1.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
-# CHECK-NEXT: 7 8 1.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
-# CHECK-NEXT: 7 8 1.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
-# CHECK-NEXT: 8 8 1.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
-# CHECK-NEXT: 8 8 1.00 * ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
+# CHECK-NEXT: 2 8 0.67 * ld2 { v0.16b, v1.16b }, [x0]
+# CHECK-NEXT: 3 8 0.33 * ld2 { v0.8b, v1.8b }, [x0], #16
+# CHECK-NEXT: 2 8 0.67 * ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: 3 8 0.67 * ld2 { v0.h, v1.h }[7], [x15], x8
+# CHECK-NEXT: 2 8 0.67 * ld2r { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 8 0.67 * ld2r { v0.4h, v1.4h }, [x0], #4
+# CHECK-NEXT: 2 8 0.67 * ld2r { v0.2s, v1.2s }, [sp]
+# CHECK-NEXT: 3 8 0.67 * ld2r { v0.1d, v1.1d }, [sp], x8
+# CHECK-NEXT: 2 8 1.00 * ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 3 8 1.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], #48
+# CHECK-NEXT: 2 8 1.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: 3 8 1.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: 2 8 1.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x15]
+# CHECK-NEXT: 3 8 1.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: 2 8 1.00 * ld3r { v0.2s, v1.2s, v2.2s }, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld3r { v0.1d, v1.1d, v2.1d }, [x0], x0
+# CHECK-NEXT: 2 8 1.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 3 9 2.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 2 8 1.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: 3 8 1.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: 2 8 1.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: 2 8 1.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# CHECK-NEXT: 3 8 1.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# CHECK-NEXT: 3 8 1.00 * ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
# CHECK-NEXT: 1 4 0.50 mla v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 4 0.50 mls v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 mov b0, v0.b[15]
@@ -1759,9 +1759,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 saddlp v0.8h, v0.16b
# CHECK-NEXT: 1 2 0.50 saddlv d0, v0.4s
# CHECK-NEXT: 1 2 0.50 saddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 saddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 saddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 saddlv h0, v0.16b
+# CHECK-NEXT: 1 4 0.50 saddlv s0, v0.8h
+# CHECK-NEXT: 1 4 0.50 saddlv h0, v0.8b
+# CHECK-NEXT: 1 4 1.00 saddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 saddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 saddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 saddw v0.8h, v0.8h, v0.8b
@@ -1770,22 +1770,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 saddw2 v0.8h, v0.8h, v0.16b
# CHECK-NEXT: 1 3 0.50 scvtf d21, d12
# CHECK-NEXT: 1 3 0.50 scvtf d21, d12, #64
-# CHECK-NEXT: 2 4 1.00 scvtf s22, s13
-# CHECK-NEXT: 2 4 1.00 scvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 scvtf s22, s13
+# CHECK-NEXT: 1 3 0.50 scvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s, #3
-# CHECK-NEXT: 2 4 1.00 scvtf v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s
-# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s, #3
-# CHECK-NEXT: 4 6 1.00 scvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 scvtf v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 scvtf v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 6 2.00 scvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.8b
# CHECK-NEXT: 1 3 0.25 sdot v0.4s, v0.16b, v0.16b
# CHECK-NEXT: 1 3 0.25 sdot v0.4s, v0.16b, v0.4b[2]
# CHECK-NEXT: 1 2 0.25 shadd v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.25 shl d7, d10, #12
+# CHECK-NEXT: 1 2 0.50 shl d7, d10, #12
# CHECK-NEXT: 1 2 0.50 shl v0.16b, v0.16b, #3
# CHECK-NEXT: 1 2 0.50 shl v0.2d, v0.2d, #3
# CHECK-NEXT: 1 2 0.50 shl v0.4h, v0.4h, #3
@@ -1810,7 +1810,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 shrn2 v0.8h, v0.4s, #3
# CHECK-NEXT: 1 2 0.25 shsub v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 shsub v0.4h, v0.4h, v0.4h
-# CHECK-NEXT: 1 2 0.25 sli d10, d14, #12
+# CHECK-NEXT: 1 2 0.50 sli d10, d14, #12
# CHECK-NEXT: 1 2 0.50 sli v0.16b, v0.16b, #3
# CHECK-NEXT: 1 2 0.50 sli v0.2d, v0.2d, #3
# CHECK-NEXT: 1 2 0.50 sli v0.2s, v0.2s, #3
@@ -1824,10 +1824,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 smaxp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 smaxp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 smaxp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 smaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 smaxv b0, v0.16b
+# CHECK-NEXT: 1 4 0.50 smaxv b0, v0.8b
+# CHECK-NEXT: 1 4 1.00 smaxv b0, v0.16b
# CHECK-NEXT: 1 2 0.50 smaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 smaxv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 smaxv h0, v0.8h
# CHECK-NEXT: 1 2 0.50 smaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 smin v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 smin v0.4s, v0.4s, v0.4s
@@ -1835,10 +1835,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 sminp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 sminp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 sminp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 sminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 sminv b0, v0.16b
+# CHECK-NEXT: 1 4 0.50 sminv b0, v0.8b
+# CHECK-NEXT: 1 4 1.00 sminv b0, v0.16b
# CHECK-NEXT: 1 2 0.50 sminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 sminv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 sminv h0, v0.8h
# CHECK-NEXT: 1 2 0.50 sminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 smlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 smlal v0.4s, v0.4h, v0.4h
@@ -1896,9 +1896,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 sqdmulh v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 sqdmulh v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 3 0.50 sqdmull d1, s1, v0.s[1]
-# CHECK-NEXT: 1 2 0.25 sqdmull d15, s22, s12
+# CHECK-NEXT: 1 3 0.50 sqdmull d15, s22, s12
# CHECK-NEXT: 1 3 0.50 sqdmull s1, h1, v0.h[3]
-# CHECK-NEXT: 1 2 0.25 sqdmull s12, h22, h12
+# CHECK-NEXT: 1 3 0.50 sqdmull s12, h22, h12
# CHECK-NEXT: 1 3 0.50 sqdmull v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 sqdmull v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 3 0.50 sqdmull2 v0.2d, v0.4s, v0.4s
@@ -1949,18 +1949,18 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 sqrshl v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 sqrshl v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 sqrshl v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.25 sqrshrn b10, h13, #2
-# CHECK-NEXT: 1 2 0.25 sqrshrn h15, s10, #6
-# CHECK-NEXT: 1 2 0.25 sqrshrn s15, d12, #9
+# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2
+# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6
+# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9
# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v0.8h, #3
# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.16b, v0.8h, #3
# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.4s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.8h, v0.4s, #3
-# CHECK-NEXT: 1 2 0.25 sqrshrun b17, h10, #6
-# CHECK-NEXT: 1 2 0.25 sqrshrun h10, s13, #15
-# CHECK-NEXT: 1 2 0.25 sqrshrun s22, d16, #31
+# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6
+# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15
+# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31
# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v0.8h, #3
@@ -1994,24 +1994,24 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 sqshlu v0.4s, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v0.8b, #3
# CHECK-NEXT: 1 4 0.50 sqshlu v0.8h, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 sqshrn b10, h15, #5
-# CHECK-NEXT: 1 2 0.25 sqshrn h17, s10, #4
-# CHECK-NEXT: 1 2 0.25 sqshrn s18, d10, #31
+# CHECK-NEXT: 1 4 0.50 sqshrn b10, h15, #5
+# CHECK-NEXT: 1 4 0.50 sqshrn h17, s10, #4
+# CHECK-NEXT: 1 4 0.50 sqshrn s18, d10, #31
# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v0.8h, #3
# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.16b, v0.8h, #3
# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.4s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.8h, v0.4s, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun b15, h10, #7
-# CHECK-NEXT: 1 2 0.25 sqshrun h20, s14, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun s10, d15, #15
-# CHECK-NEXT: 1 2 0.25 sqshrun v0.2s, v0.2d, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun v0.4h, v0.4s, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun v0.8b, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun2 v0.16b, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun2 v0.4s, v0.2d, #3
-# CHECK-NEXT: 1 2 0.25 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun b15, h10, #7
+# CHECK-NEXT: 1 4 0.50 sqshrun h20, s14, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun s10, d15, #15
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.8h, v0.4s, #3
# CHECK-NEXT: 1 2 0.25 sqsub s20, s10, s7
# CHECK-NEXT: 1 2 0.25 sqsub v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 sqsub v0.4s, v0.4s, v0.4s
@@ -2037,7 +2037,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 srhadd v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 srhadd v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 srhadd v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.25 sri d10, d12, #14
+# CHECK-NEXT: 1 2 0.50 sri d10, d12, #14
# CHECK-NEXT: 1 2 0.50 sri v0.16b, v0.16b, #3
# CHECK-NEXT: 1 2 0.50 sri v0.2d, v0.2d, #3
# CHECK-NEXT: 1 2 0.50 sri v0.2s, v0.2s, #3
@@ -2049,7 +2049,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 srshl v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 srshl v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 4 0.50 srshl v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 1 2 0.25 srshr d19, d18, #7
+# CHECK-NEXT: 1 4 0.50 srshr d19, d18, #7
# CHECK-NEXT: 1 4 0.50 srshr v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 srshr v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 srshr v0.2s, v0.2s, #3
@@ -2057,7 +2057,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 srshr v0.4s, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 srshr v0.8b, v0.8b, #3
# CHECK-NEXT: 1 4 0.50 srshr v0.8h, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 srsra d15, d11, #19
+# CHECK-NEXT: 1 4 0.50 srsra d15, d11, #19
# CHECK-NEXT: 1 4 0.50 srsra v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 srsra v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 srsra v0.2s, v0.2s, #3
@@ -2072,7 +2072,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 sshl v0.8b, v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.50 sshll v0.2d, v0.2s, #3
# CHECK-NEXT: 1 2 0.50 sshll2 v0.4s, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 sshr d15, d16, #12
+# CHECK-NEXT: 1 2 0.50 sshr d15, d16, #12
# CHECK-NEXT: 1 2 0.50 sshr v0.16b, v0.16b, #3
# CHECK-NEXT: 1 2 0.50 sshr v0.2d, v0.2d, #3
# CHECK-NEXT: 1 2 0.50 sshr v0.2s, v0.2s, #3
@@ -2080,7 +2080,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 sshr v0.4s, v0.4s, #3
# CHECK-NEXT: 1 2 0.50 sshr v0.8b, v0.8b, #3
# CHECK-NEXT: 1 2 0.50 sshr v0.8h, v0.8h, #3
-# CHECK-NEXT: 1 2 0.25 ssra d18, d12, #21
+# CHECK-NEXT: 1 4 0.50 ssra d18, d12, #21
# CHECK-NEXT: 1 4 0.50 ssra v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 ssra v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 ssra v0.2s, v0.2s, #3
@@ -2101,27 +2101,27 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 ssubw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 ssubw2 v0.8h, v0.8h, v0.16b
# CHECK-NEXT: 2 2 0.50 * st1 { v0.16b }, [x0]
-# CHECK-NEXT: 5 2 1.00 * st1 { v0.4s, v1.4s }, [sp], #32
-# CHECK-NEXT: 7 2 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
-# CHECK-NEXT: 8 2 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 3 2 1.00 * st1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 3 2 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
# CHECK-NEXT: 3 2 0.50 * st1 { v0.8h }, [x15], x2
-# CHECK-NEXT: 4 2 1.00 * st1 { v0.8h, v1.8h }, [x15]
-# CHECK-NEXT: 6 2 1.50 * st1 { v0.4s, v1.4s, v2.4s }, [sp]
-# CHECK-NEXT: 5 2 1.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 2 2 1.00 * st1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 2 2 1.50 * st1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 3 2 1.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
# CHECK-NEXT: 2 4 0.50 * st1 { v0.d }[1], [x0]
# CHECK-NEXT: 3 4 0.50 * st1 { v0.d }[1], [x0], #8
-# CHECK-NEXT: 5 4 1.00 * st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 4 1.00 * st2 { v0.16b, v1.16b }, [x0], x1
# CHECK-NEXT: 2 4 0.50 * st2 { v0.8b, v1.8b }, [x0]
# CHECK-NEXT: 2 4 0.50 * st2 { v0.s, v1.s }[3], [sp]
# CHECK-NEXT: 3 4 0.50 * st2 { v0.s, v1.s }[3], [sp], #8
-# CHECK-NEXT: 4 4 1.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15]
-# CHECK-NEXT: 7 5 1.50 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
-# CHECK-NEXT: 4 4 1.00 * st3 { v0.h, v1.h, v2.h }[7], [x15]
-# CHECK-NEXT: 5 4 1.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6
-# CHECK-NEXT: 6 6 1.50 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
-# CHECK-NEXT: 13 7 3.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
-# CHECK-NEXT: 6 6 1.00 * st4 { v0.b, v1.b, v2.b, v3.b }[15], [x0]
-# CHECK-NEXT: 5 4 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+# CHECK-NEXT: 2 4 1.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 3 5 1.50 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 2 4 1.00 * st3 { v0.h, v1.h, v2.h }[7], [x15]
+# CHECK-NEXT: 3 4 1.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+# CHECK-NEXT: 2 6 3.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 3 7 6.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 2 6 1.00 * st4 { v0.b, v1.b, v2.b, v3.b }[15], [x0]
+# CHECK-NEXT: 3 4 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
# CHECK-NEXT: 1 2 0.25 sub d15, d5, d16
# CHECK-NEXT: 1 2 0.25 sub v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.25 sudot v0.2s, v0.8b, v0.4b[2]
@@ -2137,22 +2137,22 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 suqadd v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 suqadd v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.25 suqadd v0.8h, v0.8h
-# CHECK-NEXT: 2 2 1.00 tbl v0.16b, { v0.16b }, v0.16b
-# CHECK-NEXT: 2 2 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
-# CHECK-NEXT: 2 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
-# CHECK-NEXT: 3 4 1.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
-# CHECK-NEXT: 2 2 1.00 tbl v0.8b, { v0.16b }, v0.8b
-# CHECK-NEXT: 2 2 1.00 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
-# CHECK-NEXT: 2 4 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
-# CHECK-NEXT: 3 4 1.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
-# CHECK-NEXT: 2 2 1.00 tbx v0.16b, { v0.16b }, v0.16b
-# CHECK-NEXT: 2 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
-# CHECK-NEXT: 3 6 1.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
-# CHECK-NEXT: 5 6 2.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
-# CHECK-NEXT: 2 2 1.00 tbx v0.8b, { v0.16b }, v0.8b
-# CHECK-NEXT: 2 4 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
-# CHECK-NEXT: 3 6 1.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
-# CHECK-NEXT: 5 6 2.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 1 4 1.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 1 4 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 1 4 1.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 1 6 1.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 1 6 2.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 1 4 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 1 6 1.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 1 6 2.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
# CHECK-NEXT: 1 2 0.25 trn1 v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 trn1 v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 2 0.25 trn1 v0.2s, v0.2s, v0.2s
@@ -2201,9 +2201,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddlp v0.8h, v0.16b
# CHECK-NEXT: 1 2 0.50 uaddlv d0, v0.4s
# CHECK-NEXT: 1 2 0.50 uaddlv s0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uaddlv s0, v0.8h
-# CHECK-NEXT: 2 4 0.50 uaddlv h0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uaddlv h0, v0.16b
+# CHECK-NEXT: 1 4 0.50 uaddlv s0, v0.8h
+# CHECK-NEXT: 1 4 0.50 uaddlv h0, v0.8b
+# CHECK-NEXT: 1 4 1.00 uaddlv h0, v0.16b
# CHECK-NEXT: 1 2 0.25 uaddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: 1 2 0.25 uaddw v0.4s, v0.4s, v0.4h
# CHECK-NEXT: 1 2 0.25 uaddw v0.8h, v0.8h, v0.8b
@@ -2212,16 +2212,16 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddw2 v0.8h, v0.8h, v0.16b
# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14
# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14, #64
-# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13
-# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 ucvtf s22, s13
+# CHECK-NEXT: 1 3 0.50 ucvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s, #3
-# CHECK-NEXT: 2 4 1.00 ucvtf v0.4h, v0.4h
-# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s
-# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s, #3
-# CHECK-NEXT: 4 6 1.00 ucvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 ucvtf v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 ucvtf v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 6 2.00 ucvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.8b
# CHECK-NEXT: 1 3 0.25 udot v0.4s, v0.16b, v0.16b
@@ -2235,10 +2235,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 umaxp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 umaxp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 umaxp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 2 4 0.50 umaxv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 umaxv b0, v0.16b
+# CHECK-NEXT: 1 4 0.50 umaxv b0, v0.8b
+# CHECK-NEXT: 1 4 1.00 umaxv b0, v0.16b
# CHECK-NEXT: 1 2 0.50 umaxv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 umaxv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 umaxv h0, v0.8h
# CHECK-NEXT: 1 2 0.50 umaxv s0, v0.4s
# CHECK-NEXT: 1 2 0.25 umin v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 umin v0.4h, v0.4h, v0.4h
@@ -2246,10 +2246,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uminp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.25 uminp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: 1 2 0.25 uminp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: 2 4 0.50 uminv b0, v0.8b
-# CHECK-NEXT: 2 4 1.00 uminv b0, v0.16b
+# CHECK-NEXT: 1 4 0.50 uminv b0, v0.8b
+# CHECK-NEXT: 1 4 1.00 uminv b0, v0.16b
# CHECK-NEXT: 1 2 0.50 uminv h0, v0.4h
-# CHECK-NEXT: 2 4 0.50 uminv h0, v0.8h
+# CHECK-NEXT: 1 4 0.50 uminv h0, v0.8h
# CHECK-NEXT: 1 2 0.50 uminv s0, v0.4s
# CHECK-NEXT: 1 4 0.50 umlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 4 0.50 umlal v0.4s, v0.4h, v0.4h
@@ -2264,10 +2264,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 umlsl2 v0.4s, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 umlsl2 v0.8h, v0.16b, v0.16b
# CHECK-NEXT: 1 3 0.25 ummla v0.4s, v0.16b, v0.16b
-# CHECK-NEXT: 1 2 0.25 umov w0, v0.b[1]
-# CHECK-NEXT: 1 2 0.25 umov w0, v0.h[1]
-# CHECK-NEXT: 1 2 0.25 mov w0, v0.s[1]
-# CHECK-NEXT: 1 2 0.25 mov x0, v0.d[1]
+# CHECK-NEXT: 1 2 1.00 umov w0, v0.b[1]
+# CHECK-NEXT: 1 2 1.00 umov w0, v0.h[1]
+# CHECK-NEXT: 1 2 1.00 mov w0, v0.s[1]
+# CHECK-NEXT: 1 2 1.00 mov x0, v0.d[1]
# CHECK-NEXT: 1 3 0.50 umull v0.2d, v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 umull v0.4s, v0.4h, v0.4h
# CHECK-NEXT: 1 3 0.50 umull v0.8h, v0.8b, v0.8b
@@ -2282,9 +2282,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 uqrshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 4 0.50 uqrshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 4 0.50 uqrshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 uqrshrn b10, h12, #5
-# CHECK-NEXT: 1 2 0.25 uqrshrn h12, s10, #14
-# CHECK-NEXT: 1 2 0.25 uqrshrn s10, d10, #25
+# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5
+# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14
+# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25
# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v0.8h, #3
@@ -2308,9 +2308,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v0.8b, #3
# CHECK-NEXT: 1 4 0.50 uqshl v0.8h, v0.8h, #3
# CHECK-NEXT: 1 4 0.50 uqshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 uqshrn b12, h10, #7
-# CHECK-NEXT: 1 2 0.25 uqshrn h10, s14, #5
-# CHECK-NEXT: 1 2 0.25 uqshrn s10, d12, #13
+# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7
+# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5
+# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13
# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v0.4s, #3
# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v0.8h, #3
@@ -2329,7 +2329,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 uqxtn2 v0.4s, v0.2d
# CHECK-NEXT: 1 4 0.50 uqxtn2 v0.8h, v0.4s
# CHECK-NEXT: 1 3 0.50 urecpe v0.2s, v0.2s
-# CHECK-NEXT: 1 4 0.50 urecpe v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 urhadd v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 1 2 0.25 urhadd v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 urhadd v0.8h, v0.8h, v0.8h
@@ -2338,7 +2338,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 urshl v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 4 0.50 urshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1 4 0.50 urshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 urshr d20, d23, #31
+# CHECK-NEXT: 1 4 0.50 urshr d20, d23, #31
# CHECK-NEXT: 1 4 0.50 urshr v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 urshr v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 urshr v0.2s, v0.2s, #3
@@ -2347,8 +2347,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 urshr v0.8b, v0.8b, #3
# CHECK-NEXT: 1 4 0.50 urshr v0.8h, v0.8h, #3
# CHECK-NEXT: 1 3 0.50 ursqrte v0.2s, v0.2s
-# CHECK-NEXT: 1 4 0.50 ursqrte v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 ursra d18, d10, #13
+# CHECK-NEXT: 1 4 1.00 ursqrte v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 ursra d18, d10, #13
# CHECK-NEXT: 1 4 0.50 ursra v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 ursra v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 ursra v0.2s, v0.2s, #3
@@ -2366,7 +2366,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 ushl v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 ushll v0.4s, v0.4h, #3
# CHECK-NEXT: 1 2 0.50 ushll2 v0.8h, v0.16b, #3
-# CHECK-NEXT: 1 2 0.25 ushr d10, d17, #18
+# CHECK-NEXT: 1 2 0.50 ushr d10, d17, #18
# CHECK-NEXT: 1 2 0.50 ushr v0.16b, v0.16b, #3
# CHECK-NEXT: 1 2 0.50 ushr v0.2d, v0.2d, #3
# CHECK-NEXT: 1 2 0.50 ushr v0.2s, v0.2s, #3
@@ -2375,11 +2375,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 ushr v0.8b, v0.8b, #3
# CHECK-NEXT: 1 2 0.50 ushr v0.8h, v0.8h, #3
# CHECK-NEXT: 1 3 0.25 usmmla v0.4s, v0.16b, v0.16b
-# CHECK-NEXT: 1 2 0.25 smov w0, v0.b[1]
-# CHECK-NEXT: 1 2 0.25 smov w0, v0.h[1]
-# CHECK-NEXT: 1 2 0.25 smov x0, v0.b[1]
-# CHECK-NEXT: 1 2 0.25 smov x0, v0.h[1]
-# CHECK-NEXT: 1 2 0.25 smov x0, v0.s[1]
+# CHECK-NEXT: 1 2 1.00 smov w0, v0.b[1]
+# CHECK-NEXT: 1 2 1.00 smov w0, v0.h[1]
+# CHECK-NEXT: 1 2 1.00 smov x0, v0.b[1]
+# CHECK-NEXT: 1 2 1.00 smov x0, v0.h[1]
+# CHECK-NEXT: 1 2 1.00 smov x0, v0.s[1]
# CHECK-NEXT: 1 2 0.25 usqadd b19, b14
# CHECK-NEXT: 1 2 0.25 usqadd d18, d22
# CHECK-NEXT: 1 2 0.25 usqadd h20, h15
@@ -2391,7 +2391,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 usqadd v0.4s, v0.4s
# CHECK-NEXT: 1 2 0.25 usqadd v0.8b, v0.8b
# CHECK-NEXT: 1 2 0.25 usqadd v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 usra d20, d13, #61
+# CHECK-NEXT: 1 4 0.50 usra d20, d13, #61
# CHECK-NEXT: 1 4 0.50 usra v0.16b, v0.16b, #3
# CHECK-NEXT: 1 4 0.50 usra v0.2d, v0.2d, #3
# CHECK-NEXT: 1 4 0.50 usra v0.2s, v0.2s, #3
@@ -2468,7 +2468,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
-# CHECK-NEXT: - - - - - - - 26.67 49.17 49.17 18.75 7.75 7.75 7.75 431.00 356.50 385.50 311.00
+# CHECK-NEXT: - - - - - - - 30.67 58.67 58.67 18.75 7.75 7.75 7.75 450.75 368.25 402.75 320.25
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
@@ -2492,8 +2492,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp v0.8b, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 addv s0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 addv h0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 addv h0, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 addv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 addv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 addv b0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 addv b0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesd v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aese v0.16b, v0.16b
@@ -2501,8 +2501,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesmc v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and v0.8b, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvt h0, s0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvtn v0.4h, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvtn2 v0.8h, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot v0.2s, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot v0.4s, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb v0.4s, v0.8h, v0.8h
@@ -2618,86 +2618,86 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt s10, s11, #0.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt v0.4s, v0.4s, #0.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtas v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtau v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtl v0.2d, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtl v0.4s, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtl2 v0.2d, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtl2 v0.4s, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms h22, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtms v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtmu v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtn v0.2s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtn v0.4h, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtn2 v0.4s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtn2 v0.8h, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns h22, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtns v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtnu v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps h22, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtps v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtpu v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn s22, d13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn v0.2s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn2 v0.4s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d12, #1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s21, s12, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs h21, h14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs h21, h14
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s
@@ -2705,13 +2705,13 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d12, #1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s21, s12, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu h12, h13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s
@@ -2719,12 +2719,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.2d, v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - 7.00 - fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 - 2.50 - fdiv v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.4h, v0.4h, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.4s, v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 - 2.50 - fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.50 - 4.50 - fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 6.50 - 6.50 - fdiv v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.2d, v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.4s, v0.4s, v0.4s
@@ -2794,11 +2794,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe d13, d13
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe s19, s14
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe v0.4h, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frecpe v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps d22, d30, d21
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps s21, s16, s13
@@ -2808,52 +2808,52 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frinta v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frinta v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frinti v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frinti v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frinti v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintm v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintm v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frintm v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintn v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintn v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frintn v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintp v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintp v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frintp v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintx v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintx v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frintx v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintz v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frintz v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frintz v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte d21, d12
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte v0.4h, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - frsqrte v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts d8, d22, d18
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts s21, s5, s12
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts v0.2d, v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - 7.50 - 7.50 - fsqrt v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 - 2.50 - fsqrt v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt v0.4h, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.50 - 3.50 - fsqrt v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 5.50 - 5.50 - fsqrt v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.50 - 4.50 - fsqrt v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 6.50 - 6.50 - fsqrt v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - - - - - ld1 { v0.16b }, [x0]
# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - ld1 { v0.8h, v1.8h }, [sp], #32
@@ -2872,29 +2872,29 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1r { v0.4s }, [x15]
# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 ld1r { v0.2d }, [x15], x16
# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld2 { v0.16b, v1.16b }, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2 { v0.8b, v1.8b }, [x0], #16
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15], x8
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 0.50 0.50 ld2r { v0.8b, v1.8b }, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2r { v0.4h, v1.4h }, [x0], #4
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 0.50 0.50 ld2r { v0.2s, v1.2s }, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2r { v0.1d, v1.1d }, [sp], x8
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.75 0.75 0.75 0.75 ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 ld2 { v0.8b, v1.8b }, [x0], #16
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15], x8
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld2r { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2r { v0.4h, v1.4h }, [x0], #4
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld2r { v0.2s, v1.2s }, [sp]
+# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.50 0.50 0.50 0.50 ld2r { v0.1d, v1.1d }, [sp], x8
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld3 { v0.4h, v1.4h, v2.4h }, [x15]
# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3 { v0.8h, v1.8h, v2.8h }, [x15], #48
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.75 0.75 0.75 0.75 ld3r { v0.8b, v1.8b, v2.8b }, [x15]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.75 0.75 0.75 0.75 ld3r { v0.2s, v1.2s, v2.2s }, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3r { v0.1d, v1.1d, v2.1d }, [x0], x0
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 1.00 1.00 1.00 1.00 ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
-# CHECK-NEXT: - - - - - - - 1.33 1.33 1.33 0.25 0.25 0.25 0.25 1.00 1.00 1.00 1.00 ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 1.00 1.00 1.00 1.00 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 1.00 1.00 ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 1.00 1.00 1.00 1.00 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 1.00 1.00 1.00 1.00 ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 1.00 1.00 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 1.00 1.00 ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld3r { v0.8b, v1.8b, v2.8b }, [x15]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld3r { v0.2s, v1.2s, v2.2s }, [x0]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld3r { v0.1d, v1.1d, v2.1d }, [x0], x0
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 0.25 0.25 0.25 0.25 1.50 1.50 1.50 1.50 ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
+# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.75 0.75 0.75 0.75 ld4r { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], x8
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - mla v0.8b, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - mls v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov b0, v0.b[15]
@@ -3003,8 +3003,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.8h, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv d0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv s0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 saddlv s0, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 saddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv h0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 saddlv h0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw v0.4s, v0.4s, v0.4h
@@ -3014,8 +3014,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.8h, v0.8h, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s
@@ -3023,13 +3023,13 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - scvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.4s, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.4s, v0.16b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl d7, d10, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 shl d7, d10, #12
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 shl v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 shl v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 shl v0.4h, v0.4h, #3
@@ -3054,7 +3054,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 shrn2 v0.8h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub v0.4h, v0.4h, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli d10, d14, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sli d10, d14, #12
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sli v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sli v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sli v0.2s, v0.2s, #3
@@ -3068,10 +3068,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 smaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv b0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 smaxv b0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv h0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 smaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv h0, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv s0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin v0.16b, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin v0.4s, v0.4s, v0.4s
@@ -3079,10 +3079,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 sminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sminv b0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sminv b0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sminv h0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 sminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sminv h0, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sminv s0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - smlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - smlal v0.4s, v0.4h, v0.4h
@@ -3140,9 +3140,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull d1, s1, v0.s[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdmull d15, s22, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull d15, s22, s12
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull s1, h1, v0.h[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdmull s12, h22, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull s12, h22, h12
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull v0.2d, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull v0.4s, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull2 v0.2d, v0.4s, v0.4s
@@ -3193,18 +3193,18 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshl v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshl v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshl v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn b10, h13, #2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn h15, s10, #6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn s15, d12, #9
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn b10, h13, #2
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn h15, s10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn s15, d12, #9
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn v0.2s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn v0.4h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn v0.8b, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn2 v0.16b, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn2 v0.4s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrn2 v0.8h, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun b17, h10, #6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun h10, s13, #15
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun s22, d16, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun b17, h10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun h10, s13, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun s22, d16, #31
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun v0.2s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun v0.4h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqrshrun v0.8b, v0.8h, #3
@@ -3238,24 +3238,24 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshlu v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshlu v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshlu v0.8h, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn b10, h15, #5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn h17, s10, #4
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn s18, d10, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn b10, h15, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn h17, s10, #4
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn s18, d10, #31
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn v0.2s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn v0.4h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn v0.8b, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn2 v0.16b, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn2 v0.4s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrn2 v0.8h, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun b15, h10, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun h20, s14, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun s10, d15, #15
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.2s, v0.2d, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.4h, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.8b, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.16b, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.4s, v0.2d, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun b15, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun h20, s14, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun s10, d15, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sqshrun2 v0.8h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub s20, s10, s7
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub v0.2d, v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub v0.4s, v0.4s, v0.4s
@@ -3281,7 +3281,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri d10, d12, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sri d10, d12, #14
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sri v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sri v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sri v0.2s, v0.2s, #3
@@ -3293,7 +3293,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshl v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshl v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshl v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr d19, d18, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr d19, d18, #7
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.2s, v0.2s, #3
@@ -3301,7 +3301,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srshr v0.8h, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra d15, d11, #19
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srsra d15, d11, #19
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srsra v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srsra v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 srsra v0.2s, v0.2s, #3
@@ -3316,7 +3316,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshl v0.8b, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshll v0.2d, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshll2 v0.4s, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr d15, d16, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr d15, d16, #12
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.2s, v0.2s, #3
@@ -3324,7 +3324,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 sshr v0.8h, v0.8h, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra d18, d12, #21
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ssra d18, d12, #21
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ssra v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ssra v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ssra v0.2s, v0.2s, #3
@@ -3362,9 +3362,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - 1.50 1.50 0.25 0.25 0.25 0.25 1.50 1.50 - - st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - st3 { v0.h, v1.h, v2.h }[7], [x15]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - st3 { v0.h, v1.h, v2.h }[7], [x15], #6
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 0.25 0.25 0.25 0.25 3.00 3.00 - - st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
-# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 - - - - 0.75 0.75 0.75 0.75 st4 { v0.b, v1.b, v2.b, v3.b }[15], [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - 6.00 6.00 0.25 0.25 0.25 0.25 6.00 6.00 - - st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - st4 { v0.b, v1.b, v2.b, v3.b }[15], [x0]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 0.25 0.25 0.25 0.25 1.00 1.00 - - st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub d15, d5, d16
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub v0.2d, v0.2d, v0.2d
@@ -3381,19 +3381,19 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.16b, { v0.16b }, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.8b, { v0.16b }, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbx v0.16b, { v0.16b }, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbx v0.8b, { v0.16b }, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
@@ -3445,8 +3445,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.8h, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv d0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv s0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uaddlv s0, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uaddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv h0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uaddlv h0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw v0.2d, v0.2d, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw v0.4s, v0.4s, v0.4h
@@ -3456,8 +3456,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.8h, v0.8h, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s
@@ -3465,7 +3465,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.4s, v0.16b, v0.16b
@@ -3479,10 +3479,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.16b, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 umaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv b0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 umaxv b0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv h0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 umaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv h0, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv s0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin v0.4h, v0.4h, v0.4h
@@ -3490,10 +3490,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.2s, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.4h, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.8b, v0.8b, v0.8b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uminv b0, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uminv b0, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uminv h0, v0.4h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uminv h0, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uminv s0, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umlal v0.2d, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umlal v0.4s, v0.4h, v0.4h
@@ -3508,10 +3508,10 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl2 v0.4s, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl2 v0.8h, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ummla v0.4s, v0.16b, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umov w0, v0.b[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umov w0, v0.h[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov w0, v0.s[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov x0, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 umov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 umov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 mov w0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 mov x0, v0.d[1]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.2d, v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.4s, v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.8h, v0.8b, v0.8b
@@ -3526,9 +3526,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn b10, h12, #5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn h12, s10, #14
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn s10, d10, #25
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn b10, h12, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn h12, s10, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn s10, d10, #25
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn v0.2s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn v0.4h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqrshrn v0.8b, v0.8h, #3
@@ -3552,9 +3552,9 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshl v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshl v0.8h, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn b12, h10, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn h10, s14, #5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn s10, d12, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn b12, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn h10, s14, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn s10, d12, #13
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn v0.2s, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn v0.4h, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqshrn v0.8b, v0.8h, #3
@@ -3573,7 +3573,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqxtn2 v0.4s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 uqxtn2 v0.8h, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - urecpe v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - urecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - urecpe v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.16b, v0.16b, v0.16b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.8h, v0.8h, v0.8h
@@ -3582,7 +3582,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshl v0.2d, v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshl v0.4s, v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshl v0.8h, v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr d20, d23, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr d20, d23, #31
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr v0.2s, v0.2s, #3
@@ -3591,8 +3591,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 urshr v0.8h, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ursqrte v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 - 0.50 - ursqrte v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra d18, d10, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ursqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ursra d18, d10, #13
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ursra v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ursra v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ursra v0.2s, v0.2s, #3
@@ -3610,7 +3610,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushl v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushll v0.4s, v0.4h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushll2 v0.8h, v0.16b, #3
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr d10, d17, #18
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr d10, d17, #18
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr v0.2s, v0.2s, #3
@@ -3619,11 +3619,11 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr v0.8b, v0.8b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 ushr v0.8h, v0.8h, #3
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usmmla v0.4s, v0.16b, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smov w0, v0.b[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smov w0, v0.h[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smov x0, v0.b[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smov x0, v0.h[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smov x0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 smov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 smov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 smov x0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 smov x0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 smov x0, v0.s[1]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd b19, b14
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd d18, d22
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd h20, h15
@@ -3635,7 +3635,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.8b, v0.8b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra d20, d13, #61
+# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 usra d20, d13, #61
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 usra v0.16b, v0.16b, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 usra v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - - - 0.50 - 0.50 usra v0.2s, v0.2s, #3
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
new file mode 100644
index 000000000000000..9097ec650f2c0ee
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
@@ -0,0 +1,7588 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 -scheduling-info < %s | FileCheck %s
+
+ .text
+ .file "V1-scheduling-info.s"
+ .globl test
+ .p2align 4
+ .type test, at function
+test:
+ .cfi_startproc
+ abs D15, D11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
+ abs V25.2S, V25.2S // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ abs Z26.B, P6/M, Z27.B // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adc W13, W6, W4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ adc X8, X12, X10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ adcs W29, W7, W30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adcs X11, X3, X5 // ADCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ add WSP, WSP, W10 // ADD <Wd|WSP>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, W2, UXTB // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, W13, UXTH #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, W13, LSL #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ add X22, X2, X27 // ADD <Xd|SP>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X25, X9, W25, UXTB // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X4, X28, W3, UXTB #3 // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+ add X0, X28, X26, LSL #3 // ADD <Xd|SP>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, #3765 // ADD <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add WSP, WSP, #3547, LSL #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X7, X30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add X7, X2, #319, LSL #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ add Z13.D, Z13.D, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z16.D, Z16.D, #233, LSL #8 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add W3, W2, W21, LSL #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ add W6, W21, W17, LSL #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ add W28, W30, W19, ASR #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ add X8, X3, X28, LSL #3 // ADD <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ add X12, X13, X0, LSL #44 // ADD <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ add X5, X20, X28, LSR #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ add D0, D23, D21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ add V19.4S, V24.4S, V15.4S // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ add Z29.D, P5/M, Z29.D, Z29.D // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z10.H, Z22.H, Z13.H // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ addhn V26.4H, V5.4S, V9.4S // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ addhn2 V1.16B, V19.8H, V6.8H // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ addp D1, V14.2D // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ addp V7.2S, V1.2S, V2.2S // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ addpl X27, X6, #-6 // ADDPL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ adds W17, WSP, W25 // ADDS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W6, WSP, W15, UXTH // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W22, WSP, W30, UXTB #2 // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W12, WSP, W29, LSL #4 // ADDS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X14, X0, X10 // ADDS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X13, X23, W8, UXTB // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X4, X26, W28, UXTB #1 // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds X10, X3, X29, LSL #2 // ADDS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W23, WSP, #502 // ADDS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W2, WSP, #2980, LSL #12 // ADDS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds X12, X4, #1345 // ADDS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X25, X18, #3037, LSL #12 // ADDS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds W12, W13, W26 // ADDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W0, W23, W20, LSL #0 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds W13, W16, W12, LSL #28 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds W20, W19, W16, ASR #0 // ADDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds X23, X12, X4 // ADDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X0, X13, X4, LSL #2 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ adds X4, X7, X6, LSL #31 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ adds X9, X8, X9, ASR #41 // ADDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ addv B0, V28.8B // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ addv B1, V26.16B // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ addv H18, V13.4H // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ addv H29, V17.8H // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ addv S22, V18.4S // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ addvl X1, X27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ adr X3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+ adr Z26.D, [Z1.D, Z8.D] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z22.S, [Z28.S, Z8.S, LSL #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z11.D, [Z2.D, Z29.D, SXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z3.D, [Z9.D, Z9.D, SXTW #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z6.D, [Z7.D, Z13.D, UXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z4.D, [Z24.D, Z22.D, UXTW #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adrp X0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+ and WSP, W16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ and X2, X22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ and Z1.B, Z1.B, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.H, Z7.H, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.S, Z7.S, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z7.D, Z7.D, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and P5.B, P1/Z, P6.B, P4.B // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ and W11, W14, W24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and W2, W21, W22, LSR #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and X1, X20, X29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and X8, X11, X22, ASR #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ and V29.8B, V26.8B, V26.8B // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ and Z17.D, P6/M, Z17.D, Z12.D // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z9.D, Z5.D, Z17.D // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ ands W14, W8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ ands X4, X10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ ands W29, W28, W12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ ands W7, W13, W23, ASR #3 // ANDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ ands X21, X9, X6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ ands X10, X27, X7, ASR #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ ands P5.B, P1/Z, P2.B, P7.B // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ andv H7, P6, Z31.H // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ asr W30, W14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ asr X12, X21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ asr Z7.B, P5/M, Z7.B, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z6.H, P6/M, Z6.H, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z28.S, P0/M, Z28.S, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z26.D, P5/M, Z26.D, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z10.B, Z14.B, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z23.H, Z18.H, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z29.S, Z11.S, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z20.D, Z26.D, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr W3, W0, W20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ asr X7, X5, X21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ asr Z3.S, P0/M, Z3.S, Z10.S // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z9.S, P2/M, Z9.S, Z8.D // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z26.S, Z21.S, Z21.D // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asrd Z6.B, P4/M, Z6.B, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z19.H, P3/M, Z19.H, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z16.S, P3/M, Z16.S, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrd Z9.D, P6/M, Z9.D, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+ asrr Z0.B, P0/M, Z0.B, Z19.B // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asrv W24, W28, W13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ asrv X3, X21, X24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ at s12e1r, X28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
+ b test // B <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.eq test // B.eq <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.none test // B.none <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ne test // B.ne <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.any test // B.any <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.cs test // B.cs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.hs test // B.hs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nlast test // B.nlast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.cc test // B.cc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.lo test // B.lo <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.last test // B.last <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.mi test // B.mi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.first test // B.first <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.pl test // B.pl <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nfrst test // B.nfrst <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.vs test // B.vs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.vc test // B.vc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.hi test // B.hi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.pmore test // B.pmore <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ls test // B.ls <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.plast test // B.plast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.ge test // B.ge <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.tcont test // B.tcont <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.lt test // B.lt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.tstop test // B.tstop <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.gt test // B.gt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.le test // B.le <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+ bfcvt H6, S20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
+ bfcvt Z16.H, P6/M, Z1.S // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfcvtn V12.4H, V15.4S // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+ bfcvtn2 V15.8H, V13.4S // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+ bfcvtnt Z11.H, P7/M, Z24.S // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfdot V0.2S, V24.4H, V14.2H[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+ bfdot Z24.S, Z26.H, Z2.H[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfdot V31.4S, V21.8H, V14.8H // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+ bfdot Z15.S, Z3.H, Z7.H // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfi W10, W26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+ bfi X25, X7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+ bfm W30, W26, #14, #12 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+ bfm X15, X20, #0, #35 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+ bfmlalb Z13.S, Z30.H, Z0.H[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalb Z3.S, Z14.H, Z13.H // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalb V22.4S, V11.8H, V11.H[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt V17.4S, V4.8H, V11.H[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalb V13.4S, V5.8H, V17.8H // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt V10.4S, V16.8H, V1.8H // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+ bfmlalt Z23.S, Z3.H, Z2.H[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalt Z25.S, Z21.H, Z22.H // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmmla V15.4S, V28.8H, V23.8H // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
+ bfmmla Z26.S, Z2.H, Z12.H // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+ bfxil W27, W23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ bfxil X0, X5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ bic Z28.B, Z28.B, #0x70 // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z18.H, Z18.H, #0x60 // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z12.S, Z12.S, #0x2 // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z6.D, Z6.D, #0x4 // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic P4.B, P4/Z, P6.B, P0.B // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ bic W0, W26, W22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic W23, W10, W7, LSL #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic X21, X20, X14 // BIC <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic X21, X3, X17, LSR #35 // BIC <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ bic V6.4H, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V23.8H, #101, LSL #0 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V24.2S, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V31.2S, #192, LSL #0 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic V25.16B, V10.16B, V9.16B // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ bic Z15.D, P4/M, Z15.D, Z25.D // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z7.D, Z8.D, Z28.D // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bics W24, W1, W25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ bics W21, W0, W24, LSL #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ bics X27, X25, X10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ bics X22, X6, X27, LSL #62 // BICS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ bics P2.B, P4/Z, P1.B, P7.B // BICS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ bif V0.8B, V25.8B, V4.8B // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ bit V5.8B, V12.8B, V22.8B // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ bl test // BL <label> \\ Branch and link, immed \\ 2 1 1 2.0 V1UnitB,V1UnitS
+ blr X11 // BLR <Xn> \\ Branch and link, register \\ 2 1 1 2.0 V1UnitB,V1UnitS
+ br X17 // BR <Xn> \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ brk #33813 // BRK #<imm> \\ No description \\ No scheduling info
+ brka P7.B, P7/Z, P5.B // BRKA <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkas P6.B, P5/Z, P0.B // BRKAS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+ brkb P5.B, P0/Z, P1.B // BRKB <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkbs P6.B, P1/Z, P4.B // BRKBS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+ brkn P7.B, P0/Z, P6.B, P7.B // BRKN <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkns P3.B, P1/Z, P7.B, P3.B // BRKNS <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+ brkpa P3.B, P5/Z, P0.B, P1.B // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkpas P2.B, P5/Z, P1.B, P3.B // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+ brkpb P1.B, P0/Z, P7.B, P6.B // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+ brkpbs P7.B, P1/Z, P6.B, P1.B // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+ bsl V27.16B, V13.16B, V21.16B // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+ cbnz W21, test // CBNZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbnz X26, test // CBNZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbz W6, test // CBZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ cbz X4, test // CBZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ ccmn W8, #14, #3, HS // CCMN <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn X23, #17, #0, GT // CCMN <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn W17, W18, #12, CS // CCMN <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmn X19, X29, #12, LAST // CCMN <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp W24, #2, #5, NLAST // CCMP <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp X12, #8, #2, LO // CCMP <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp W2, W9, #3, TSTOP // CCMP <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ ccmp X11, X10, #13, LS // CCMP <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+ cinc W23, W5, TSTOP // CINC <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cinc X2, X1, NFRST // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ cinv W9, W12, TCONT // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cinv X9, X30, FIRST // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ clasta B11, P4, B11, Z21.B // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clasta W8, P0, W8, Z6.B // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+ clasta Z25.S, P1, Z25.S, Z14.S // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clastb D6, P7, D6, Z31.D // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clastb W28, P6, W28, Z12.B // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+ clastb Z27.H, P6, Z27.H, Z22.H // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clrex // CLREX \\ No description \\ No scheduling info
+ clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
+ cls V5.8B, V22.8B // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ cls W25, W0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+ cls X22, X6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+ cls Z28.D, P3/M, Z2.D // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ clz V24.8H, V30.8H // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ clz W26, W27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+ clz X4, X0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+ clz Z3.S, P3/M, Z18.S // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ cmeq D26, D5, D25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq V9.8H, V16.8H, V24.8H // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq D7, D26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmeq V14.4H, V18.4H, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge D26, D21, D28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge V22.8H, V16.8H, V3.8H // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge D30, D12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmge V22.16B, V30.16B, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt D23, D25, D12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt V3.2D, V29.2D, V11.2D // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt D28, D14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmgt V22.2S, V10.2S, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhi D29, D16, D5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhi V28.4H, V25.4H, V21.4H // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhs D5, D3, D12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmhs V6.8B, V31.8B, V12.8B // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmle D14, D21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmle V21.2S, V19.2S, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmlt D21, D24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmlt V26.4H, V12.4H, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmn WSP, W7 // CMN <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn WSP, W8, SXTB // CMN <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn WSP, W3, UXTB #3 // CMN <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn WSP, W7, LSL #3 // CMN <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X2, X28 // CMN <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X3, W0, UXTB // CMN <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X0, W4, UXTB #3 // CMN <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn X14, X26, LSL #2 // CMN <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn WSP, #613 // CMN <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn WSP, #2991, LSL #12 // CMN <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn X23, #3803 // CMN <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X29, #3786, LSL #12 // CMN <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn W12, W0 // CMN <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn W19, W27, LSL #1 // CMN <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn W2, W11, LSL #29 // CMN <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn W0, W0, ASR #30 // CMN <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn X23, X28 // CMN <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X6, X1, LSL #2 // CMN <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmn X28, X30, LSL #26 // CMN <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmn X25, X15, LSR #49 // CMN <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp WSP, W26 // CMP <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp WSP, W13, SXTH // CMP <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp WSP, W12, SXTH #3 // CMP <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp WSP, W30, LSL #4 // CMP <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp X22, X18 // CMP <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp X16, W27, UXTB // CMP <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp X16, W7, UXTB #4 // CMP <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp X21, X24, LSL #4 // CMP <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp WSP, #2342 // CMP <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp WSP, #3664, LSL #12 // CMP <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp X5, #1482 // CMP <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp X4, #3684, LSL #12 // CMP <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp W14, W0, LSL #4 // CMP <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp W0, W23, LSL #29 // CMP <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp W2, W28, LSR #20 // CMP <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp X27, X10, LSL #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ cmp X18, X12, LSL #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmp X6, X7, LSR #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ cmpeq P2.H, P0/Z, Z26.H, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P1.B, P4/Z, Z28.B, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P1.B, P0/Z, Z13.B, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P1.D, P3/Z, Z23.D, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P7.D, P5/Z, Z23.D, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P5.B, P2/Z, Z9.B, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P3.S, P5/Z, Z18.S, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P6.D, P6/Z, Z31.D, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P0.H, P6/Z, Z29.H, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P5.S, P4/Z, Z18.S, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpeq P6.S, P5/Z, Z2.S, Z9.S // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P7.S, P4/Z, Z15.S, Z15.S // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P2.H, P4/Z, Z26.H, Z11.H // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P0.S, P4/Z, Z8.S, Z4.S // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P1.D, P6/Z, Z26.D, Z15.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P4.B, P3/Z, Z21.B, Z16.B // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpeq P2.D, P3/Z, Z13.D, Z18.D // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpge P2.B, P3/Z, Z3.B, Z16.D // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpgt P2.H, P2/Z, Z28.H, Z30.D // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphi P0.H, P5/Z, Z30.H, Z16.D // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmphs P7.H, P2/Z, Z1.H, Z26.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P7.B, P7/Z, Z3.B, Z13.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P6.D, P2/Z, Z16.D, Z16.D // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P3.H, P2/Z, Z12.H, Z26.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P0.D, P4/Z, Z29.D, Z26.D // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpne P0.S, P4/Z, Z30.S, Z8.D // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmple P1.D, P3/Z, Z2.D, Z26.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplo P7.B, P0/Z, Z4.B, Z25.B // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpls P4.D, P4/Z, Z2.D, Z14.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmplt P2.S, P2/Z, Z31.S, Z21.S // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmtst D10, D6, D5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cmtst V13.2D, V13.2D, V13.2D // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+ cneg W3, W17, HI // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+ cneg X26, X8, LAST // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+ cnot Z7.S, P7/M, Z8.S // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ cnt V12.16B, V14.16B // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+ cnt Z26.H, P0/M, Z27.H // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ cntb X18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntb X9, VL128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntb X28, VL8, MUL #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X20 // CNTD <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X27, VL7 // CNTD <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntd X8, VL7, MUL #2 // CNTD <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X27 // CNTH <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X0, VL1 // CNTH <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cnth X16, VL3, MUL #6 // CNTH <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X22 // CNTW <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X23, VL3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntw X6, VL16, MUL #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ cntp X22, P1, P2.S // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ compact Z17.S, P1, Z18.S // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ cpy Z13.B, P0/M, B6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z3.B, P6/M, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z11.S, P5/M, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z0.H, P0/M, #-11, LSL #0 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z5.B, P1/Z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z12.H, P1/Z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z25.D, P3/Z, #-81, LSL #8 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ cpy Z24.H, P0/M, W19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ cpy Z23.S, P2/M, WSP // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ crc32b W27, W12, W15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32h W3, W15, W21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32w W9, W18, W24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32x W19, W6, X25 // CRC32X <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cb W25, W28, W30 // CRC32CB <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32ch W25, W26, W16 // CRC32CH <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cw W27, W12, W23 // CRC32CW <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ crc32cx W21, W28, X5 // CRC32CX <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+ csdb // CSDB \\ No description \\ No scheduling info
+ csel W25, W16, W30, LS // CSEL <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csel X28, X1, X2, PL // CSEL <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ cset W6, NE // CSET <Wd>, <cond> \\ No description \\ No scheduling info
+ cset X11, LT // CSET <Xd>, <cond> \\ No description \\ No scheduling info
+ csetm W3, HI // CSETM <Wd>, <cond> \\ No description \\ No scheduling info
+ csetm X6, NE // CSETM <Xd>, <cond> \\ No description \\ No scheduling info
+ csinc W9, W3, W14, LT // CSINC <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinc X20, X11, X23, TCONT // CSINC <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinv W1, W4, W3, NLAST // CSINV <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csinv X27, X21, X15, NE // CSINV <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csneg W5, W13, W4, HI // CSNEG <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ csneg X8, X29, X29, PMORE // CSNEG <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+ ctermeq X4, X11 // CTERMEQ <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+ ctermne X0, X16 // CTERMNE <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+ dc CSW, X16 // DC <dc_op>, <Xt> \\ No description \\ No scheduling info
+ dcps1 // DCPS1 \\ No description \\ No scheduling info
+ dcps1 #4391 // DCPS1 #<imm> \\ No description \\ No scheduling info
+ dcps2 // DCPS2 \\ No description \\ No scheduling info
+ dcps2 #26756 // DCPS2 #<imm> \\ No description \\ No scheduling info
+ dcps3 // DCPS3 \\ No description \\ No scheduling info
+ dcps3 #47330 // DCPS3 #<imm> \\ No description \\ No scheduling info
+ decb X22 // DECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decb X5, VL256 // DECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decb X21, VL256, MUL #7 // DECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X11 // DECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X19, ALL // DECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd X24, VL2, MUL #10 // DECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X16 // DECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X20, MUL4 // DECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ dech X0, MUL3, MUL #15 // DECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X18, VL32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decw X29, VL6, MUL #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ decd Z19.D // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decd Z22.D, MUL3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decd Z1.D, VL128, MUL #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ dech Z23.H // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ dech Z29.H, VL5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ dech Z28.H, VL64, MUL #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decw Z8.S // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decw Z4.S, VL64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decw Z27.S, VL4, MUL #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decp X6, P6.B // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ decp Z22.H, P1 // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ dmb SY // DMB <option> \\ No description \\ No scheduling info
+ dmb #6 // DMB #<imm> \\ No description \\ No scheduling info
+ drps // DRPS \\ No description \\ No scheduling info
+ dup B15, V25.B[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup H2, V31.H[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup S10, V2.S[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup D24, V7.D[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V25.8B, V21.B[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V28.8H, V29.H[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V24.4S, V9.S[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V20.2D, V3.D[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ dup V19.4S, W27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
+ dup Z30.B, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z15.H, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z22.D, #-14, LSL #0 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z2.B, Z26.B[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z23.H, Z22.H[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z29.S, Z30.S[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z4.D, Z7.D[0] // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z25.D, X28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ dup Z18.S, WSP // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ dupm Z18.B, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z12.H, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z16.S, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z16.D, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ eon W29, W4, W19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon W14, W24, W28, ASR #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon X19, X12, X2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon X23, X23, X23, ASR #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eon Z7.B, Z7.B, #0x70 // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z3.H, Z3.H, #0x60 // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z2.S, Z2.S, #0x2 // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z24.D, Z24.D, #0x4 // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor WSP, W4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ eor X27, X25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ eor Z19.B, Z19.B, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z18.H, Z18.H, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z10.S, Z10.S, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z29.D, Z29.D, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor P6.B, P7/Z, P3.B, P5.B // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ eor W8, W27, W2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor W8, W7, W29, ASR #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor X22, X16, X6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor X0, X23, X30, LSL #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ eor V8.16B, V10.16B, V19.16B // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ eor Z8.H, P3/M, Z8.H, Z14.H // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z30.D, Z26.D, Z20.D // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eors P1.B, P0/Z, P3.B, P1.B // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ eorv H17, P1, Z15.H // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ eret // ERET \\ No description \\ No scheduling info
+ esb // ESB \\ No description \\ No scheduling info
+ ext V12.8B, V22.8B, V31.8B, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+ ext V17.16B, V18.16B, V8.16B, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+ extr W19, W20, W20, #16 // EXTR <Wd>, <Wn>, <Wn>, #<lsbs> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+ extr W27, W4, W5, #23 // EXTR <Wd>, <Wn>, <Wm>, #<lsbs> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
+ extr X25, X22, X22, #62 // EXTR <Xd>, <Xn>, <Xn>, #<lsbd> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+ extr X0, X12, X13, #17 // EXTR <Xd>, <Xn>, <Xm>, #<lsbd> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
+ fabd H27, H20, H17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd S16, S29, S6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd V13.8H, V28.8H, V12.8H // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd V12.4S, V4.4S, V31.4S // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabd Z11.H, P6/M, Z11.H, Z5.H // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+ fabs H25, H7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs S17, S12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs D30, D8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs V16.4S, V31.4S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs V17.2S, V28.2S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+ fabs Z26.S, P7/M, Z24.S // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+ facge P0.H, P5/Z, Z15.H, Z18.H // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ facgt P7.S, P7/Z, Z10.S, Z4.S // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ facge H24, H26, H29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge D25, D24, D7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge V25.4H, V16.4H, V11.4H // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facge V19.2S, V24.2S, V5.2S // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt H0, H4, H10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt S29, S3, S2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt V22.8H, V14.8H, V31.8H // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facgt V22.4S, V8.4S, V2.4S // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ facle P7.H, P5/Z, Z22.H, Z27.H // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ faclt P5.H, P5/Z, Z31.H, Z16.H // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fadd Z4.H, P7/M, Z4.H, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadd H23, H27, H22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd S1, S23, S27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd D16, D15, D21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd V7.2D, V30.2D, V20.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd V16.2D, V13.2D, V11.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fadd Z26.H, P4/M, Z26.H, Z1.H // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadd Z23.S, Z7.S, Z16.S // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fadda H8, P3, H8, Z28.H // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 1 19 19 0.06 V1UnitV0[18]
+ fadda S11, P6, S11, Z1.S // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 1 11 11 0.1 V1UnitV0[10]
+ fadda D27, P4, D27, Z27.D // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 1 8 8 0.67 V1UnitV01[3]
+ faddp H10, V19.2H // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp D11, V28.2D // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp V16.2D, V11.2D, V5.2D // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddp V16.4S, V11.4S, V18.4S // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ faddv H21, P2, Z3.H // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+ faddv S16, P2, Z25.S // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+ faddv D18, P4, Z7.D // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fcadd Z29.H, P2/M, Z29.H, Z15.H, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
+ fccmp H31, H3, #11, HS // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmp S5, S6, #0, CC // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmp D17, D15, #0, ANY // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe H6, H1, #12, ANY // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe S16, S13, #10, VS // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fccmpe D17, D14, #15, PLAST // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq P7.D, P1/Z, Z23.D, Z21.D // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmge P6.H, P1/Z, Z19.H, Z10.H // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmgt P5.S, P2/Z, Z29.S, Z5.S // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmne P5.D, P0/Z, Z22.D, Z15.D // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmuo P0.D, P2/Z, Z15.D, Z23.D // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq P4.D, P5/Z, Z19.D, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmge P0.D, P5/Z, Z10.D, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmgt P6.D, P1/Z, Z8.D, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmle P2.D, P4/Z, Z26.D, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmlt P5.D, P5/Z, Z23.D, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmne P2.H, P3/Z, Z7.H, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq H30, H6, H1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq S17, S0, S21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V19.2S, V31.2S, V19.2S // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V12.4S, V11.4S, V26.4S // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq H19, H23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq S25, S18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V8.2S, V16.2S, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmeq V18.2D, V17.2D, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge H1, H16, H12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge D29, D9, D3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V20.8H, V19.8H, V22.8H // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V17.2D, V11.2D, V13.2D // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge H10, H23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge D5, D17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V18.4H, V27.4H, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmge V17.2S, V11.2S, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt H4, H5, H0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt S13, S20, S3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V24.8H, V24.8H, V28.8H // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V19.4S, V20.4S, V13.4S // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt H0, H18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt D30, D23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V0.8H, V11.8H, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmgt V19.2D, V31.2D, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmla Z20.H, Z12.H, Z4.H[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmla Z1.S, Z27.S, Z6.S[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmla Z25.S, P3/M, Z13.S, Z23.S, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+ fcmle P5.S, P3/Z, Z28.S, Z12.S // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmle H18, H28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle D18, D16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle V16.8H, V11.8H, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmle V22.4S, V30.4S, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt P1.S, P1/Z, Z13.S, Z24.S // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmlt H23, H7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt D22, D28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt V8.4H, V2.4H, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmlt V7.2D, V16.2D, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+ fcmp H5, H21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp H5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp S7, S0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp S28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp D1, D27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmp D16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe H22, H21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe H13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe S11, S29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe S15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe D27, D22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcmpe D9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+ fcpy Z2.H, P7/M, #0.5 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fcsel H26, H2, H11, NLAST // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcsel S5, S1, S4, VC // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcsel D14, D0, D19, NONE // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+ fcvt S13, H13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt D10, H6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt H1, S1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt D9, S23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt H17, D16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt S31, D27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+ fcvt Z0.S, P1/M, Z4.H // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvt Z6.D, P0/M, Z17.H // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z7.H, P7/M, Z5.S // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvt Z11.D, P2/M, Z18.S // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z26.H, P0/M, Z30.D // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z13.S, P2/M, Z3.D // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W23, H3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X14, H29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W0, S13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X23, S15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas W1, D31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas X2, D3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtas H27, H24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas S16, S0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas D14, D7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas V5.4H, V16.4H // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtas V13.8H, V30.8H // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtas V12.2S, V1.2S // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtas V9.4S, V31.4S // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtas V2.2D, V22.2D // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau W13, H27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X8, H12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau W20, S10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X27, S22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau W6, D26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau X16, D13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtau H6, H29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau S23, S7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau D1, D26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau V12.4H, V13.4H // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtau V21.8H, V0.8H // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtau V31.2S, V6.2S // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtau V29.4S, V26.4S // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtau V9.2D, V7.2D // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtl V30.4S, V4.4H // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtl V28.2D, V13.2S // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+ fcvtl2 V14.4S, V29.8H // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtl2 V0.2D, V9.4S // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+ fcvtms W15, H1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X5, H2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms W1, S16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X27, S22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms W18, D21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms X6, D26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtms H19, H29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms S30, S14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms D8, D20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms V27.4H, V7.4H // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtms V26.8H, V11.8H // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtms V13.2S, V2.2S // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtms V18.4S, V21.4S // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtms V15.2D, V16.2D // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu W20, H6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X7, H18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu W24, S19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X7, S15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu W16, D16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu X1, D18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtmu H20, H13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu S28, S25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu D3, D27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu V18.4H, V2.4H // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtmu V10.8H, V11.8H // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtmu V27.2S, V14.2S // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtmu V31.4S, V4.4S // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtmu V6.2D, V26.2D // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtn V4.4H, V22.4S // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtn V14.2S, V2.2D // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtn2 V0.8H, V30.4S // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtn2 V21.4S, V13.2D // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtns W19, H15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X20, H0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns W10, S5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X14, S12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns W30, D2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns X0, D12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtns H16, H25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns S23, S19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns D30, D1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns V28.4H, V19.4H // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtns V19.8H, V19.8H // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtns V20.2S, V4.2S // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtns V28.4S, V29.4S // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtns V21.2D, V31.2D // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu W12, H3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X23, H27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu W4, S23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X5, S28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu W4, D11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu X12, D8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtnu H24, H22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu S29, S22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu D18, D15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu V5.4H, V12.4H // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtnu V26.8H, V20.8H // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtnu V15.2S, V1.2S // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtnu V7.4S, V16.4S // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtnu V13.2D, V8.2D // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps W27, H14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X26, H20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps W5, S27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X29, S6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps W23, D25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps X10, D16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtps H31, H22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps S3, S3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps D10, D26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps V13.4H, V26.4H // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtps V26.8H, V10.8H // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtps V18.2S, V8.2S // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtps V12.4S, V18.4S // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtps V3.2D, V2.2D // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu W25, H22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X4, H24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu W13, S0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X0, S17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu W16, D25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu X15, D12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtpu H1, H29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu S21, S30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu D16, D26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu V2.4H, V25.4H // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtpu V24.8H, V26.8H // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtpu V6.2S, V23.2S // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtpu V10.4S, V6.4S // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtpu V7.2D, V23.2D // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn S29, D4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn V25.2S, V15.2D // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtxn2 V21.4S, V6.2D // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs W28, H26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X22, H17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W17, S23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X15, S30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W13, D17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X14, D9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W15, H10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X4, H21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W1, S4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X27, S27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs W24, D30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs X18, D21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs H29, H23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs S23, S15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs D20, D26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V20.4H, V24.4H, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzs V18.8H, V10.8H, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtzs V16.2S, V2.2S, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V22.4S, V18.4S, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzs V14.2D, V30.2D, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs H16, H27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs S4, S5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs D4, D23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V8.4H, V16.4H // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzs V2.8H, V16.8H // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtzs V27.2S, V28.2S // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs V29.4S, V18.4S // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzs V13.2D, V31.2D // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzs Z1.H, P2/M, Z6.H // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzs Z19.S, P4/M, Z16.H // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzs Z14.D, P0/M, Z6.H // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzs Z25.S, P5/M, Z23.S // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvtzs Z3.D, P1/M, Z31.S // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvtzs Z28.S, P5/M, Z23.D // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs Z22.D, P6/M, Z29.D // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W12, H19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X17, H23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W16, S3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X27, S15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W21, D10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X26, D30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W26, H30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X9, H11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W20, S16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X7, S21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu W25, D30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu X13, D8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu H19, H8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu S25, S27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu D30, D16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V19.4H, V26.4H, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzu V27.8H, V6.8H, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtzu V30.2S, V4.2S, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V31.4S, V6.4S, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzu V10.2D, V12.2D, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu H25, H30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu S2, S19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu D4, D7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V3.4H, V2.4H // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzu V30.8H, V25.8H // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ fcvtzu V25.2S, V25.2S // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu V21.4S, V2.4S // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ fcvtzu V23.2D, V15.2D // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ fcvtzu Z15.H, P0/M, Z8.H // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzu Z8.S, P5/M, Z18.H // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzu Z11.D, P4/M, Z24.H // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+ fcvtzu Z13.S, P7/M, Z8.S // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvtzu Z20.D, P2/M, Z13.S // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+ fcvtzu Z31.S, P3/M, Z20.D // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu Z4.D, P1/M, Z25.D // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fdiv H1, H26, H23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+ fdiv S31, S18, S12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
+ fdiv D6, D3, D0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
+ fdiv V21.4H, V15.4H, V22.4H // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+ fdiv V31.8H, V12.8H, V15.8H // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+ fdiv V15.2S, V23.2S, V2.2S // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+ fdiv V7.4S, V27.4S, V22.4S // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+ fdiv V31.2D, V25.2D, V8.2D // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.14 V1UnitV02[14]
+ fdiv Z21.H, P7/M, Z21.H, Z15.H // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+ fdiv Z17.S, P4/M, Z17.S, Z20.S // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+ fdiv Z13.D, P3/M, Z13.D, Z28.D // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
+ fdivr Z29.H, P4/M, Z29.H, Z1.H // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+ fdivr Z13.S, P0/M, Z13.S, Z29.S // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+ fdivr Z14.D, P3/M, Z14.D, Z31.D // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
+ fdup Z19.S, #0.5 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fexpa Z6.H, Z3.H // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ fmad Z9.S, P5/M, Z9.S, Z7.S // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmadd H27, H0, H6, H28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmadd S13, S24, S15, S5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmadd D19, D4, D2, D17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmax Z25.D, P2/M, Z25.D, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmax H8, H7, H11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax S9, S21, S2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax D4, D26, D26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax V0.4S, V13.4S, V21.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax V12.4S, V27.4S, V11.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmax Z16.S, P5/M, Z16.S, Z12.S // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnm Z25.D, P5/M, Z25.D, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnm H29, H13, H14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm S25, S20, S0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm D29, D25, D16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm V6.4S, V3.4S, V3.4S // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm V9.2D, V15.2D, V11.2D // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmaxnm Z6.S, P5/M, Z6.S, Z17.S // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnmp H25, V19.2H // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp D17, V29.2D // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp V31.4S, V4.4S, V2.4S // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmp V23.4S, V15.4S, V1.4S // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxnmv H0, V13.4H // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fmaxnmv H12, V11.8H // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+ fmaxnmv S28, V31.4S // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fmaxnmv H9, P3, Z2.H // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+ fmaxnmv S26, P6, Z0.S // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+ fmaxnmv D7, P1, Z29.D // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fmaxp H15, V25.2H // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp S6, V2.2S // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp V21.2S, V17.2S, V13.2S // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxp V10.4S, V5.4S, V25.4S // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fmaxv H23, V4.4H // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fmaxv H25, V15.8H // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+ fmaxv S23, V2.4S // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fmaxv H12, P0, Z22.H // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+ fmaxv S24, P5, Z12.S // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+ fmaxv D1, P6, Z25.D // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fmin Z24.D, P4/M, Z24.D, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmin H4, H13, H17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin S1, S14, S22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin D18, D19, D22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin V6.4S, V25.4S, V27.4S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin V12.2S, V30.2S, V25.2S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fmin Z11.H, P3/M, Z11.H, Z16.H // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnm Z19.H, P4/M, Z19.H, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnm H29, H23, H17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm S24, S14, S30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm D0, D26, D8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm V16.2S, V23.2S, V27.2S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm V23.4S, V19.4S, V22.4S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+ fminnm Z24.S, P3/M, Z24.S, Z13.S // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnmp H20, V14.2H // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp D15, V8.2D // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp V27.2D, V27.2D, V16.2D // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmp V2.4S, V14.4S, V14.4S // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminnmv H19, V25.4H // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fminnmv H23, V17.8H // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+ fminnmv S29, V17.4S // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fminnmv H24, P3, Z1.H // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+ fminnmv S30, P3, Z9.S // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+ fminnmv D18, P5, Z8.D // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fminp H7, V10.2H // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp S17, V7.2S // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp V25.4S, V2.4S, V15.4S // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminp V14.2S, V28.2S, V15.2S // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+ fminv H3, V30.4H // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fminv H29, V12.8H // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+ fminv S16, V19.4S // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+ fminv H15, P2, Z25.H // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+ fminv S4, P0, Z6.S // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+ fminv D20, P1, Z5.D // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fmla H23, H24, V15.H[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla S9, S20, V28.S[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla D12, D20, V7.D[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V29.8H, V15.8H, V10.H[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V2.2S, V16.2S, V28.S[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V14.4S, V14.4S, V5.S[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V10.2D, V14.2D, V21.D[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla Z2.H, Z4.H, Z7.H[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z22.S, Z15.S, Z1.S[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z1.D, Z30.D, Z11.D[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla V1.4S, V24.4S, V12.4S // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla V30.2D, V16.2D, V6.2D // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmla Z6.S, P1/M, Z24.S, Z24.S // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls H8, H14, V7.H[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls S20, S17, V5.S[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls D11, D24, V29.D[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V30.8H, V18.8H, V4.H[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V10.2S, V27.2S, V0.S[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V27.4S, V7.4S, V24.S[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V10.2D, V22.2D, V29.D[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls Z3.H, Z31.H, Z0.H[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z30.S, Z8.S, Z0.S[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z10.D, Z20.D, Z0.D[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls V6.2S, V3.2S, V12.2S // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls V6.8H, V15.8H, V23.8H // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmls Z26.S, P5/M, Z28.S, Z26.S // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmov W15, H31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov X21, H14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov H6, W5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov S22, W0 // FMOV <Sd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov W23, S30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov H16, X27 // FMOV <Hd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov D22, X12 // FMOV <Dd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+ fmov V7.D[1], X8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ fmov X26, D29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov X4, V26.D[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+ fmov Z2.S, P0/M, #0.5 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov Z14.S, #0.5 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov H18, H28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov S13, S23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov D27, D17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov H29, #0.5 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov S22, #0.5 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov D18, #0.5 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V12.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V10.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov V0.2D, #0.5 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+ fmov Z2.D, P2/M, #0.0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov Z5.S, #0.0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmsb Z25.S, P5/M, Z25.S, Z29.S // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmsub H25, H28, H12, H24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmsub S31, S0, S23, S24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmsub D12, D10, D20, D16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fmul H18, H4, V7.H[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul S17, S23, V30.S[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul D27, D8, V10.D[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V10.4H, V2.4H, V7.H[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V5.2S, V12.2S, V9.S[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V15.4S, V30.4S, V2.S[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V11.2D, V31.2D, V24.D[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul Z17.H, P5/M, Z17.H, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z27.H, Z30.H, Z0.H[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z6.S, Z16.S, Z1.S[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z4.D, Z30.D, Z2.D[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul H28, H14, H3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul S28, S16, S24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul D19, D19, D0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V0.2D, V14.2D, V20.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul V9.2D, V29.2D, V7.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmul Z22.D, P1/M, Z22.D, Z3.D // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z19.S, Z14.S, Z26.S // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmulx H18, H17, V7.H[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx S23, S3, V3.S[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx D3, D13, V30.D[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V28.4H, V25.4H, V15.H[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V3.2S, V22.2S, V23.S[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V5.4S, V28.4S, V15.S[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V22.2D, V18.2D, V25.D[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx H20, H25, H0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx D18, D19, D22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V22.2D, V18.2D, V4.2D // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx V16.2S, V4.2S, V27.2S // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+ fmulx Z7.H, P5/M, Z7.H, Z21.H // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fneg H2, H9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg S11, S19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg D5, D16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg V26.2D, V2.2D // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg V14.2S, V24.2S // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+ fneg Z16.S, P0/M, Z25.S // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fnmad Z6.H, P2/M, Z14.H, Z21.H // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmadd H3, H18, H31, H24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmadd S8, S18, S2, S14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmadd D19, D29, D28, D30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmla Z15.D, P0/M, Z8.D, Z29.D // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmls Z13.D, P0/M, Z8.D, Z12.D // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmsb Z30.D, P7/M, Z8.D, Z9.D // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmsub H3, H29, H24, H17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmsub S29, S26, S17, S4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmsub D7, D13, D13, D4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+ fnmul H3, H15, H7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ fnmul S16, S11, S2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ fnmul D12, D22, D14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+ frecpe H20, H8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe S27, S7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe D2, D1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe V28.4H, V27.4H // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frecpe V9.8H, V6.8H // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frecpe V25.2S, V28.2S // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frecpe V21.4S, V18.4S // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frecpe V10.2D, V26.2D // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frecpe Z14.H, Z0.H // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
+ frecpe Z5.S, Z16.S // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
+ frecpe Z27.D, Z11.D // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frecps H29, H19, H8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps D25, D17, D12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps V12.8H, V25.8H, V4.8H // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps V7.2D, V29.2D, V18.2D // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frecps Z11.S, Z31.S, Z1.S // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+ frecpx H18, H11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+ frecpx S13, S30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+ frecpx Z15.S, P4/M, Z12.S // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
+ frintn Z30.H, P3/M, Z31.H // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintn Z17.S, P4/M, Z23.S // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintn Z28.D, P1/M, Z25.D // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinta Z10.H, P6/M, Z17.H // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frinta Z7.S, P4/M, Z27.S // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frinta Z17.D, P4/M, Z17.D // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintm Z26.H, P7/M, Z0.H // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintm Z6.S, P0/M, Z28.S // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintm Z29.D, P4/M, Z3.D // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintp Z20.H, P4/M, Z12.H // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintp Z3.S, P7/M, Z18.S // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintp Z28.D, P7/M, Z4.D // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintz Z27.H, P2/M, Z12.H // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintz Z12.S, P6/M, Z3.S // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintz Z12.D, P2/M, Z31.D // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinti Z16.H, P4/M, Z9.H // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frinti Z18.S, P6/M, Z27.S // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frinti Z26.D, P2/M, Z12.D // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frintx Z17.H, P0/M, Z9.H // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+ frintx Z27.S, P7/M, Z16.S // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+ frintx Z21.D, P4/M, Z23.D // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frinta H22, H10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta S15, S7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta D30, D10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinta V24.4H, V10.4H // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frinta V5.8H, V3.8H // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frinta V23.2S, V22.2S // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinta V28.4S, V28.4S // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frinta V3.2D, V13.2D // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinti H31, H14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti S23, S9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti D8, D12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frinti V6.4H, V10.4H // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frinti V22.8H, V7.8H // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frinti V9.2S, V25.2S // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frinti V23.4S, V7.4S // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frinti V28.2D, V5.2D // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintm H0, H21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm S22, S10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm D5, D30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintm V3.4H, V8.4H // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintm V19.8H, V26.8H // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frintm V15.2S, V8.2S // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintm V20.4S, V26.4S // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintm V20.2D, V11.2D // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintn H12, H3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn S27, S14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn D30, D17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintn V27.4H, V4.4H // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintn V17.8H, V19.8H // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frintn V23.2S, V23.2S // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintn V2.4S, V4.4S // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintn V24.2D, V12.2D // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintp H17, H31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp S14, S10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp D25, D13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintp V22.4H, V25.4H // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintp V18.8H, V11.8H // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frintp V31.2S, V5.2S // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintp V0.4S, V24.4S // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintp V1.2D, V3.2D // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintx H4, H5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx S10, S28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx D17, D19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintx V24.4H, V25.4H // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintx V1.8H, V27.8H // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frintx V2.2S, V14.2S // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintx V27.4S, V31.4S // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintx V24.2D, V20.2D // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintz H10, H29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz S11, S23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz D6, D11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+ frintz V13.4H, V5.4H // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintz V20.8H, V21.8H // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frintz V15.2S, V19.2S // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frintz V11.4S, V18.4S // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frintz V12.2D, V22.2D // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ frsqrte H23, H26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte S23, S5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte D3, D11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte V16.4H, V15.4H // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frsqrte V14.8H, V0.8H // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ frsqrte V6.2S, V8.2S // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+ frsqrte V30.4S, V21.4S // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frsqrte V15.2D, V14.2D // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ frsqrte Z6.H, Z30.H // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
+ frsqrte Z27.S, Z15.S // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
+ frsqrte Z6.D, Z17.D // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frsqrts H28, H26, H1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts S28, S1, S11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts V8.4H, V9.4H, V30.4H // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts V20.4S, V26.4S, V27.4S // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+ frsqrts Z10.H, Z25.H, Z22.H // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+ fscale Z2.H, P0/M, Z2.H, Z21.H // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fsqrt H13, H24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+ fsqrt S20, S15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
+ fsqrt D25, D21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
+ fsqrt V24.4H, V14.4H // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+ fsqrt V12.8H, V3.8H // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+ fsqrt V30.2S, V20.2S // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+ fsqrt V2.4S, V24.4S // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+ fsqrt V28.2D, V25.2D // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.13 V1UnitV02[15]
+ fsqrt Z13.H, P3/M, Z11.H // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+ fsqrt Z2.S, P7/M, Z0.S // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+ fsqrt Z17.D, P6/M, Z17.D // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[14]
+ fsub Z12.D, P6/M, Z12.D, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsub H20, H11, H18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub S15, S4, S24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub D25, D26, D4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub V13.8H, V15.8H, V17.8H // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub V1.2S, V31.2S, V27.2S // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+ fsub Z24.S, P4/M, Z24.S, Z10.S // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsub Z19.H, Z8.H, Z29.H // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsubr Z22.H, P7/M, Z22.H, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsubr Z13.S, P2/M, Z13.S, Z4.S // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ ftmad Z19.D, Z19.D, Z6.D, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ ftsmul Z21.S, Z0.S, Z10.S // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ ftssel Z5.D, Z0.D, Z15.D // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ hint #9 // HINT #<imm> \\ No description \\ No scheduling info
+ hlt #31335 // HLT #<imm> \\ No description \\ No scheduling info
+ hvc #60601 // HVC #<imm> \\ No description \\ No scheduling info
+ ic IALLUIS // IC <ic_op> \\ No description \\ No scheduling info
+ ic IVAU, X6 // IC <ic_op2>, <Xt> \\ No description \\ No scheduling info
+ incb X18 // INCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incb X17, VL3 // INCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incb X17, MUL3, MUL #7 // INCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X19 // INCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X17, VL3 // INCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd X11, VL64, MUL #7 // INCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X24 // INCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X23, ALL // INCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ inch X22, VL1, MUL #8 // INCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X2, VL64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incw X2, VL8, MUL #1 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ incd Z24.D // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incd Z23.D, VL8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incd Z20.D, VL2, MUL #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ inch Z29.H // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ inch Z28.H, VL16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ inch Z29.H, VL16, MUL #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incw Z17.S // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incw Z31.S, MUL3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incw Z12.S, VL4, MUL #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incp X7, P0.H // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ incp Z2.D, P6 // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ index Z8.B, #15, W14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z14.H, #11, W10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z17.S, #14, W21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z5.D, #11, X15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+ index Z16.B, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z13.H, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z20.S, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+ index Z13.D, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 1 5 5 0.5 V1UnitV0[2]
+ index Z28.B, W27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z13.H, W28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z22.S, W7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z0.D, X25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+ index Z6.B, W24, W8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z20.H, W4, W7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z10.S, W2, W19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+ index Z2.D, X23, X7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+ ins V15.B[7], V6.B[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V17.H[1], V3.H[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V4.S[1], V7.S[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V22.D[1], V25.D[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ ins V14.B[3], W12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V25.H[2], W14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V14.S[1], W29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ ins V19.D[1], X27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ insr Z4.D, D0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ insr Z4.D, X14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ isb // ISB \\ No description \\ No scheduling info
+ isb SY // ISB <option> \\ No description \\ No scheduling info
+ isb #1 // ISB #<imm> \\ No description \\ No scheduling info
+ lasta B3, P1, Z3.B // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ lasta W16, P0, Z10.B // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ lastb D3, P1, Z17.D // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+ lastb X4, P3, Z31.D // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ ld1 { V23.8B }, [X11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V25.8B }, [X30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V14.8B }, [X1], X26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V12.16B }, [X19] // LD1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V24.16B }, [X28], #16 // LD1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V21.16B }, [X25], X28 // LD1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V8.4H }, [X30] // LD1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V4.4H }, [X10], #8 // LD1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V17.4H }, [X12], X16 // LD1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.8H }, [X27] // LD1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V21.8H }, [X24], #16 // LD1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V9.8H }, [X9], X27 // LD1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V4.2S }, [X2] // LD1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V19.2S }, [X27], #8 // LD1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V25.2S }, [X13], X19 // LD1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V3.4S }, [X4] // LD1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V24.4S }, [X20], #16 // LD1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V29.4S }, [X25], X23 // LD1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.1D }, [X9] // LD1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V23.1D }, [X3], #8 // LD1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V19.1D }, [X10], X19 // LD1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V3.2D }, [X28] // LD1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+ ld1 { V8.2D }, [X16], #16 // LD1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V5.2D }, [X1], X29 // LD1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ld1 { V24.8B, V25.8B }, [X6] // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V17.8B, V18.8B }, [X18], #16 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V18.8B, V19.8B }, [X6], X11 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V0.16B, V1.16B }, [X14] // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V20.16B, V21.16B }, [X2], #32 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V5.16B, V6.16B }, [X17], X25 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V25.4H, V26.4H }, [X3] // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V10.4H, V11.4H }, [X14], #16 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V0.4H, V1.4H }, [X24], X15 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V1.8H, V2.8H }, [X27] // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V22.8H, V23.8H }, [X13], #32 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V9.8H, V10.8H }, [X4], X13 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V6.2S, V7.2S }, [X29] // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V23.2S, V24.2S }, [X10], #16 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V26.2S, V27.2S }, [X21], X29 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V11.4S, V12.4S }, [X30] // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V23.4S, V24.4S }, [X14], #32 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V12.4S, V13.4S }, [X27], X22 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V27.1D, V28.1D }, [X7] // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V13.1D, V14.1D }, [X29], #16 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V1.1D, V2.1D }, [X7], X20 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V13.2D, V14.2D }, [X13] // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V13.2D, V14.2D }, [X10], #32 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V20.2D, V21.2D }, [X29], X28 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V7.8B, V8.8B, V9.8B }, [X12] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V13.8B, V14.8B, V15.8B }, [X10], #24 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V28.8B, V29.8B, V30.8B }, [X2], X21 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V19.16B, V20.16B, V21.16B }, [X10] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V8.16B, V9.16B, V10.16B }, [X29], #48 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V14.16B, V15.16B, V16.16B }, [X5], X17 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V10.4H, V11.4H, V12.4H }, [X28] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V22.4H, V23.4H, V24.4H }, [X6], #24 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V11.4H, V12.4H, V13.4H }, [X13], X23 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V21.8H, V22.8H, V23.8H }, [X22] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V26.8H, V27.8H, V28.8H }, [X2], #48 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V6.8H, V7.8H, V8.8H }, [X22], X6 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V16.2S, V17.2S, V18.2S }, [X27] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V3.2S, V4.2S, V5.2S }, [X30], #24 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V14.2S, V15.2S, V16.2S }, [X11], X28 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V0.4S, V1.4S, V2.4S }, [X24] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V17.4S, V18.4S, V19.4S }, [X28], #48 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V5.4S, V6.4S, V7.4S }, [X20], X13 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V14.1D, V15.1D, V16.1D }, [X3] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V21.1D, V22.1D, V23.1D }, [X24], #24 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V25.1D, V26.1D, V27.1D }, [X18], X14 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V12.2D, V13.2D, V14.2D }, [X15] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+ ld1 { V13.2D, V14.2D, V15.2D }, [X4], #48 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V15.2D, V16.2D, V17.2D }, [X10], X6 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+ ld1 { V4.8B, V5.8B, V6.8B, V7.8B }, [X13] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V8.8B, V9.8B, V10.8B, V11.8B }, [X30], #32 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V4.8B, V5.8B, V6.8B, V7.8B }, [X20], X3 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V13.16B, V14.16B, V15.16B, V16.16B }, [X9] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+ ld1 { V3.16B, V4.16B, V5.16B, V6.16B }, [X17], #64 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V10.16B, V11.16B, V12.16B, V13.16B }, [X19], X29 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V20.4H, V21.4H, V22.4H, V23.4H }, [X15] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V4.4H, V5.4H, V6.4H, V7.4H }, [X12], #32 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V24.4H, V25.4H, V26.4H, V27.4H }, [X25], X0 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V0.8H, V1.8H, V2.8H, V3.8H }, [X21] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+ ld1 { V12.8H, V13.8H, V14.8H, V15.8H }, [X21], #64 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V14.8H, V15.8H, V16.8H, V17.8H }, [X12], X23 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V21.2S, V22.2S, V23.2S, V24.2S }, [X21] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V27.2S, V28.2S, V29.2S, V30.2S }, [X11], #32 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V24.2S, V25.2S, V26.2S, V27.2S }, [X1], X22 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V15.4S, V16.4S, V17.4S, V18.4S }, [X28] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+ ld1 { V14.4S, V15.4S, V16.4S, V17.4S }, [X8], #64 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V11.4S, V12.4S, V13.4S, V14.4S }, [X2], X28 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V22.1D, V23.1D, V24.1D, V25.1D }, [X4] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+ ld1 { V3.1D, V4.1D, V5.1D, V6.1D }, [X23], #32 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V22.1D, V23.1D, V24.1D, V25.1D }, [X9], X22 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ld1 { V18.2D, V19.2D, V20.2D, V21.2D }, [X6] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+ ld1 { V3.2D, V4.2D, V5.2D, V6.2D }, [X3], #64 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V6.2D, V7.2D, V8.2D, V9.2D }, [X17], X18 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+ ld1 { V18.B }[3], [X23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V18.H }[3], [X1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V8.S }[0], [X24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V11.D }[0], [X13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1 { V23.B }[1], [X13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V10.B }[9], [X25], X14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V6.H }[2], [X26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V30.H }[6], [X27], X3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V5.S }[1], [X10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V13.S }[3], [X6], X24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V26.D }[1], [X28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1 { V1.D }[1], [X20], X30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1b { Z20.B }, P1/Z, [X25] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z10.B }, P1/Z, [X16, #-1, MUL VL] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z31.H }, P1/Z, [X4] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z5.H }, P5/Z, [X8, #6, MUL VL] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z1.S }, P3/Z, [X12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z24.S }, P2/Z, [X28, #1, MUL VL] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z25.D }, P5/Z, [X2] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z0.D }, P6/Z, [X22, #5, MUL VL] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z7.B }, P0/Z, [X24, X11] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z26.H }, P5/Z, [X5, X21] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z22.S }, P3/Z, [X16, X12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z7.D }, P5/Z, [X18, X12] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1b { Z2.D }, P0/Z, [X15, Z18.D, UXTW] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1b { Z20.S }, P6/Z, [X2, Z0.S, SXTW] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1b { Z15.D }, P4/Z, [X23, Z9.D] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1b { Z8.S }, P4/Z, [Z25.S, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1b { Z13.D }, P2/Z, [Z3.D, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1d { Z21.D }, P1/Z, [X24, Z31.D, SXTW #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1d { Z7.D }, P0/Z, [X13, Z15.D, SXTW] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1d { Z14.D }, P1/Z, [X26, Z27.D, LSL #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1d { Z30.D }, P7/Z, [X14, Z16.D] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1d { Z22.D }, P1/Z, [Z15.D] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1d { Z8.D }, P4/Z, [Z12.D, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1h { Z3.H }, P2/Z, [X21] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z15.H }, P0/Z, [X25, #-3, MUL VL] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z9.S }, P1/Z, [X17] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z1.S }, P3/Z, [X14, #5, MUL VL] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z10.D }, P3/Z, [X9] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z2.D }, P7/Z, [X1, #4, MUL VL] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z26.H }, P5/Z, [X10, X19, LSL #1] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z29.S }, P7/Z, [X23, X11, LSL #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z2.D }, P5/Z, [X30, X9, LSL #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1h { Z14.S }, P7/Z, [X14, Z28.S, SXTW #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1h { Z28.D }, P7/Z, [X8, Z9.D, SXTW #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1h { Z16.D }, P5/Z, [X7, Z9.D, UXTW] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1h { Z27.S }, P4/Z, [X4, Z7.S, UXTW] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1h { Z6.D }, P7/Z, [X30, Z26.D, LSL #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1h { Z11.D }, P2/Z, [X20, Z25.D] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1h { Z6.S }, P7/Z, [Z31.S] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1h { Z1.S }, P3/Z, [Z12.S, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1h { Z7.D }, P7/Z, [Z9.D] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1h { Z13.D }, P3/Z, [Z5.D, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1r { V8.8B }, [X23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V4.8B }, [X25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V14.8B }, [X24], X14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V8.16B }, [X24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V21.16B }, [X30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V1.16B }, [X3], X9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V28.4H }, [X9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V10.4H }, [X27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V12.4H }, [X8], X20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V3.8H }, [X16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V27.8H }, [X18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V20.8H }, [X20], X4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V10.2S }, [X20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V28.2S }, [X8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V4.2S }, [X0], X12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V11.4S }, [X3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V18.4S }, [X3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V2.4S }, [X4], X1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V3.1D }, [X15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V16.1D }, [X2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V24.1D }, [X21], X3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V18.2D }, [X0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld1r { V8.2D }, [X18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1r { V8.2D }, [X16], X28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld1rb { Z13.B }, P0/Z, [X9] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z30.B }, P6/Z, [X21, #28] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z10.H }, P1/Z, [X9] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z25.H }, P3/Z, [X26, #6] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z24.S }, P2/Z, [X19] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z16.S }, P1/Z, [X8, #54] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z17.D }, P7/Z, [X4] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rb { Z4.D }, P7/Z, [X20, #18] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rd { Z12.D }, P7/Z, [X20] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rd { Z19.D }, P5/Z, [X13, #384] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z13.H }, P7/Z, [X0] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z23.H }, P0/Z, [X18, #56] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z24.S }, P6/Z, [X27] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z6.S }, P7/Z, [X1, #84] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z3.D }, P4/Z, [X25] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rh { Z25.D }, P5/Z, [X5, #108] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z31.B }, P1/Z, [X6] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z21.B }, P7/Z, [X29, #112] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqb { Z7.B }, P6/Z, [X26, X26] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z10.D }, P0/Z, [X28] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z29.D }, P5/Z, [X6, #-16] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqd { Z5.D }, P6/Z, [X7, X8, LSL #3] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z29.H }, P3/Z, [X3] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z29.H }, P4/Z, [X30, #112] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqh { Z9.H }, P0/Z, [X23, X11, LSL #1] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load broadcast, scalar + scalar + S \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1rqw { Z11.S }, P0/Z, [X26] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqw { Z7.S }, P3/Z, [X16, #-80] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rqw { Z2.S }, P0/Z, [X21, X23, LSL #2] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z6.H }, P6/Z, [X23] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z28.H }, P3/Z, [X21, #43] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z13.S }, P5/Z, [X14] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z26.S }, P3/Z, [X15, #4] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z23.D }, P2/Z, [X21] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsb { Z29.D }, P6/Z, [X14, #25] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z25.S }, P2/Z, [X4] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z30.S }, P5/Z, [X6, #124] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z24.D }, P4/Z, [X6] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsh { Z14.D }, P3/Z, [X20, #98] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsw { Z2.D }, P0/Z, [X23] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rsw { Z18.D }, P7/Z, [X11, #0] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z12.S }, P7/Z, [X9] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z25.S }, P7/Z, [X17, #60] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z22.D }, P5/Z, [X1] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1rw { Z2.D }, P3/Z, [X3, #36] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z28.H }, P6/Z, [X9] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z22.H }, P2/Z, [X19, #7, MUL VL] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z22.S }, P3/Z, [X23] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z2.S }, P6/Z, [X22, #-2, MUL VL] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z31.D }, P6/Z, [X10] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z23.D }, P5/Z, [X2, #-4, MUL VL] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z3.H }, P5/Z, [X10, X23] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z16.S }, P7/Z, [X27, X16] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z13.D }, P7/Z, [X28, X18] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sb { Z30.D }, P6/Z, [X22, Z27.D, UXTW] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sb { Z23.S }, P5/Z, [X17, Z10.S, UXTW] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sb { Z23.D }, P2/Z, [X28, Z10.D] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sb { Z14.S }, P4/Z, [Z18.S, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sb { Z5.D }, P0/Z, [Z25.D, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sh { Z8.S }, P3/Z, [X21] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z29.S }, P4/Z, [X11, #-4, MUL VL] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z13.D }, P6/Z, [X18] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z19.D }, P2/Z, [X29, #-3, MUL VL] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sh { Z28.S }, P0/Z, [X6, X28, LSL #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1sh { Z26.D }, P0/Z, [X7, X12, LSL #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ld1sh { Z22.S }, P3/Z, [X7, Z1.S, UXTW #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sh { Z3.D }, P6/Z, [X11, Z14.D, SXTW #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sh { Z27.D }, P3/Z, [X19, Z23.D, SXTW] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sh { Z12.S }, P5/Z, [X27, Z13.S, SXTW] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sh { Z9.D }, P0/Z, [X22, Z8.D, LSL #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sh { Z22.D }, P0/Z, [X27, Z12.D] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sh { Z1.S }, P2/Z, [Z9.S, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sh { Z11.D }, P5/Z, [Z30.D, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sw { Z7.D }, P1/Z, [X19] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z28.D }, P1/Z, [X26, #4, MUL VL] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z26.D }, P4/Z, [X20, X17, LSL #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+ ld1sw { Z22.D }, P1/Z, [X14, Z23.D, SXTW #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sw { Z4.D }, P3/Z, [X20, Z15.D, SXTW] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sw { Z1.D }, P4/Z, [X20, Z23.D, LSL #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1sw { Z2.D }, P7/Z, [X4, Z0.D] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sw { Z12.D }, P7/Z, [Z21.D] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1sw { Z27.D }, P3/Z, [Z10.D, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1w { Z9.S }, P0/Z, [X18, Z9.S, SXTW #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1w { Z14.D }, P5/Z, [X26, Z2.D, UXTW #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1w { Z31.D }, P6/Z, [X17, Z2.D, UXTW] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1w { Z14.S }, P2/Z, [X18, Z28.S, SXTW] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1w { Z13.D }, P3/Z, [X5, Z11.D, LSL #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1w { Z24.D }, P3/Z, [X2, Z17.D] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1w { Z4.S }, P0/Z, [Z1.S] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1w { Z17.S }, P6/Z, [Z26.S, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ld1w { Z31.D }, P7/Z, [Z22.D] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld1w { Z2.D }, P3/Z, [Z6.D, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld2 { V13.8B, V14.8B }, [X4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld2 { V20.8B, V21.8B }, [X11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V13.8B, V14.8B }, [X4], X7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V26.16B, V27.16B }, [X16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V15.16B, V16.16B }, [X3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V24.16B, V25.16B }, [X7], X30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V0.4H, V1.4H }, [X21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld2 { V5.4H, V6.4H }, [X30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V5.4H, V6.4H }, [X22], X1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V8.8H, V9.8H }, [X28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V14.8H, V15.8H }, [X19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V28.8H, V29.8H }, [X26], X7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V2.2S, V3.2S }, [X16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+ ld2 { V23.2S, V24.2S }, [X5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V22.2S, V23.2S }, [X11], X12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+ ld2 { V22.4S, V23.4S }, [X4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V27.4S, V28.4S }, [X18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V22.4S, V23.4S }, [X26], X29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V22.2D, V23.2D }, [X17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V12.2D, V13.2D }, [X19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V6.2D, V7.2D }, [X11], X24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V29.B, V30.B }[3], [X1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V23.H, V24.H }[7], [X14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V26.S, V27.S }[1], [X17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V1.D, V2.D }[0], [X10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2 { V20.B, V21.B }[9], [X24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V29.B, V30.B }[6], [X18], X19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V2.H, V3.H }[3], [X12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V11.H, V12.H }[3], [X18], X17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V15.S, V16.S }[1], [X7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V29.S, V30.S }[1], [X12], X0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V1.D, V2.D }[1], [X3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2 { V10.D, V11.D }[1], [X18], X27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2b { Z9.B, Z10.B }, P2/Z, [X22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2b { Z28.B, Z29.B }, P3/Z, [X22, #4, MUL VL] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2b { Z26.B, Z27.B }, P1/Z, [X3, X12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z12.D, Z13.D }, P5/Z, [X24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P2/Z, [X21, #-2, MUL VL] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P6/Z, [X14, X4, LSL #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z5.H, Z6.H }, P5/Z, [X20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z27.H, Z28.H }, P7/Z, [X11, #14, MUL VL] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2h { Z18.H, Z19.H }, P3/Z, [X9, X17, LSL #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 10 10 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitS[2]
+ ld2r { V10.8B, V11.8B }, [X20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V18.8B, V19.8B }, [X11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V28.8B, V29.8B }, [X30], X14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V10.16B, V11.16B }, [X23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V24.16B, V25.16B }, [X1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V20.16B, V21.16B }, [X11], X7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V25.4H, V26.4H }, [X11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V28.4H, V29.4H }, [X18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V21.4H, V22.4H }, [X2], X17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V23.8H, V24.8H }, [X10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V19.8H, V20.8H }, [X29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V13.8H, V14.8H }, [X13], X5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V25.2S, V26.2S }, [X19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V5.2S, V6.2S }, [X28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V4.2S, V5.2S }, [X14], X19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V8.4S, V9.4S }, [X17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V22.4S, V23.4S }, [X5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V29.4S, V30.4S }, [X4], X18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V9.1D, V10.1D }, [X25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V15.1D, V16.1D }, [X26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V10.1D, V11.1D }, [X28], X26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V26.2D, V27.2D }, [X8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+ ld2r { V14.2D, V15.2D }, [X3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2r { V24.2D, V25.2D }, [X6], X14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+ ld2w { Z21.S, Z22.S }, P4/Z, [X12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2w { Z29.S, Z30.S }, P2/Z, [X19, #6, MUL VL] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2w { Z18.S, Z19.S }, P6/Z, [X22, X22, LSL #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld3 { V8.8B, V9.8B, V10.8B }, [X0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V6.8B, V7.8B, V8.8B }, [X26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V20.8B, V21.8B, V22.8B }, [X25], X24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V15.16B, V16.16B, V17.16B }, [X5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V19.16B, V20.16B, V21.16B }, [X3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V26.16B, V27.16B, V28.16B }, [X8], X29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V15.4H, V16.4H, V17.4H }, [X8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V4.4H, V5.4H, V6.4H }, [X5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V24.4H, V25.4H, V26.4H }, [X25], X0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V7.8H, V8.8H, V9.8H }, [X21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V4.8H, V5.8H, V6.8H }, [X26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V12.8H, V13.8H, V14.8H }, [X0], X25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V16.2S, V17.2S, V18.2S }, [X0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V9.2S, V10.2S, V11.2S }, [X1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V27.2S, V28.2S, V29.2S }, [X23], X4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V12.4S, V13.4S, V14.4S }, [X25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V12.4S, V13.4S, V14.4S }, [X27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V2.4S, V3.4S, V4.4S }, [X22], X21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V10.2D, V11.2D, V12.2D }, [X18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V25.2D, V26.2D, V27.2D }, [X4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V6.2D, V7.2D, V8.2D }, [X10], X24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V17.B, V18.B, V19.B }[2], [X27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V18.H, V19.H, V20.H }[5], [X16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V1.S, V2.S, V3.S }[3], [X14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V5.D, V6.D, V7.D }[1], [X14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3 { V16.B, V17.B, V18.B }[3], [X15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V14.B, V15.B, V16.B }[4], [X23], X6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V11.H, V12.H, V13.H }[1], [X28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V4.H, V5.H, V6.H }[2], [X5], X15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V26.S, V27.S, V28.S }[0], [X14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V1.S, V2.S, V3.S }[0], [X26], X20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V14.D, V15.D, V16.D }[1], [X30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3 { V23.D, V24.D, V25.D }[0], [X24], X14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3b { Z29.B, Z30.B, Z31.B }, P3/Z, [X17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3b { Z23.B, Z24.B, Z25.B }, P7/Z, [X12, #18, MUL VL] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3b { Z23.B, Z24.B, Z25.B }, P3/Z, [X12, X12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld3d { Z20.D, Z21.D, Z22.D }, P2/Z, [X6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3d { Z1.D, Z2.D, Z3.D }, P2/Z, [X9, #-15, MUL VL] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3d { Z13.D, Z14.D, Z15.D }, P6/Z, [X27, X30, LSL #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld3h { Z26.H, Z27.H, Z28.H }, P1/Z, [X29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3h { Z14.H, Z15.H, Z16.H }, P3/Z, [X18, #9, MUL VL] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3h { Z5.H, Z6.H, Z7.H }, P3/Z, [X6, X21, LSL #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld3r { V24.8B, V25.8B, V26.8B }, [X10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V14.8B, V15.8B, V16.8B }, [X11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V22.8B, V23.8B, V24.8B }, [X0], X11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V17.16B, V18.16B, V19.16B }, [X3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V7.16B, V8.16B, V9.16B }, [X29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V3.16B, V4.16B, V5.16B }, [X20], X5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V3.4H, V4.4H, V5.4H }, [X1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V8.4H, V9.4H, V10.4H }, [X3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V4.4H, V5.4H, V6.4H }, [X0], X28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V6.8H, V7.8H, V8.8H }, [X28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V4.8H, V5.8H, V6.8H }, [X11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V3.8H, V4.8H, V5.8H }, [X17], X0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V18.2S, V19.2S, V20.2S }, [X24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V8.2S, V9.2S, V10.2S }, [X22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V12.2S, V13.2S, V14.2S }, [X0], X14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V28.4S, V29.4S, V30.4S }, [X2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V21.4S, V22.4S, V23.4S }, [X22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V28.4S, V29.4S, V30.4S }, [X13], X25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V1.1D, V2.1D, V3.1D }, [X28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V0.1D, V1.1D, V2.1D }, [X7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V22.1D, V23.1D, V24.1D }, [X9], X15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V8.2D, V9.2D, V10.2D }, [X3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld3r { V3.2D, V4.2D, V5.2D }, [X25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3r { V8.2D, V9.2D, V10.2D }, [X18], X13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld3w { Z23.S, Z24.S, Z25.S }, P1/Z, [X8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3w { Z6.S, Z7.S, Z8.S }, P4/Z, [X0, #18, MUL VL] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+ ld3w { Z27.S, Z28.S, Z29.S }, P3/Z, [X3, X6, LSL #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld4 { V6.8B, V7.8B, V8.8B, V9.8B }, [X27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V20.8B, V21.8B, V22.8B, V23.8B }, [X10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V18.8B, V19.8B, V20.8B, V21.8B }, [X24], X11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V11.16B, V12.16B, V13.16B, V14.16B }, [X5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld4 { V10.16B, V11.16B, V12.16B, V13.16B }, [X12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V12.16B, V13.16B, V14.16B, V15.16B }, [X4], X17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V21.4H, V22.4H, V23.4H, V24.4H }, [X14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V10.4H, V11.4H, V12.4H, V13.4H }, [X19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V5.4H, V6.4H, V7.4H, V8.4H }, [X15], X17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V9.8H, V10.8H, V11.8H, V12.8H }, [X1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld4 { V2.8H, V3.8H, V4.8H, V5.8H }, [X0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V4.8H, V5.8H, V6.8H, V7.8H }, [X17], X17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V23.2S, V24.2S, V25.2S, V26.2S }, [X24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V25.2S, V26.2S, V27.2S, V28.2S }, [X3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V22.2S, V23.2S, V24.2S, V25.2S }, [X14], X15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V17.4S, V18.4S, V19.4S, V20.4S }, [X4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld4 { V25.4S, V26.4S, V27.4S, V28.4S }, [X19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V4.4S, V5.4S, V6.4S, V7.4S }, [X28], X3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V2.2D, V3.2D, V4.2D, V5.2D }, [X24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ld4 { V18.2D, V19.2D, V20.2D, V21.2D }, [X0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V27.2D, V28.2D, V29.2D, V30.2D }, [X27], X4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+ ld4 { V4.B, V5.B, V6.B, V7.B }[12], [X27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V5.H, V6.H, V7.H, V8.H }[0], [X4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V0.S, V1.S, V2.S, V3.S }[0], [X26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V2.D, V3.D, V4.D, V5.D }[0], [X29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4 { V26.B, V27.B, V28.B, V29.B }[4], [X13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V10.B, V11.B, V12.B, V13.B }[11], [X24], X21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V8.H, V9.H, V10.H, V11.H }[0], [X17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V21.H, V22.H, V23.H, V24.H }[2], [X21], X24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V1.S, V2.S, V3.S, V4.S }[1], [X28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V20.S, V21.S, V22.S, V23.S }[1], [X27], X16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V18.D, V19.D, V20.D, V21.D }[1], [X26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4 { V8.D, V9.D, V10.D, V11.D }[0], [X23], X0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4b { Z16.B, Z17.B, Z18.B, Z19.B }, P3/Z, [X23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P5/Z, [X3, #12, MUL VL] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P4/Z, [X20, X12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ld4d { Z26.D, Z27.D, Z28.D, Z29.D }, P7/Z, [X10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4d { Z27.D, Z28.D, Z29.D, Z30.D }, P0/Z, [X6, #24, MUL VL] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4d { Z7.D, Z8.D, Z9.D, Z10.D }, P4/Z, [X25, X8, LSL #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P4/Z, [X19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P1/Z, [X16, #-8, MUL VL] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4h { Z10.H, Z11.H, Z12.H, Z13.H }, P2/Z, [X8, X28, LSL #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ld4r { V20.8B, V21.8B, V22.8B, V23.8B }, [X23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V24.8B, V25.8B, V26.8B, V27.8B }, [X15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V4.8B, V5.8B, V6.8B, V7.8B }, [X26], X6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V1.16B, V2.16B, V3.16B, V4.16B }, [X25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V1.16B, V2.16B, V3.16B, V4.16B }, [X14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V0.16B, V1.16B, V2.16B, V3.16B }, [X29], X11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V16.4H, V17.4H, V18.4H, V19.4H }, [X6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V14.4H, V15.4H, V16.4H, V17.4H }, [X0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V21.4H, V22.4H, V23.4H, V24.4H }, [X25], X22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V4.8H, V5.8H, V6.8H, V7.8H }, [X23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V25.8H, V26.8H, V27.8H, V28.8H }, [X7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V13.8H, V14.8H, V15.8H, V16.8H }, [X19], X27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V0.2S, V1.2S, V2.2S, V3.2S }, [X30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V23.2S, V24.2S, V25.2S, V26.2S }, [X29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V19.2S, V20.2S, V21.2S, V22.2S }, [X9], X0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V7.4S, V8.4S, V9.4S, V10.4S }, [X23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V9.4S, V10.4S, V11.4S, V12.4S }, [X3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V3.4S, V4.4S, V5.4S, V6.4S }, [X10], X22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V7.1D, V8.1D, V9.1D, V10.1D }, [X26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V11.1D, V12.1D, V13.1D, V14.1D }, [X5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V12.1D, V13.1D, V14.1D, V15.1D }, [X30], X17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V7.2D, V8.2D, V9.2D, V10.2D }, [X8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+ ld4r { V12.2D, V13.2D, V14.2D, V15.2D }, [X2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4r { V17.2D, V18.2D, V19.2D, V20.2D }, [X21], X13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+ ld4w { Z18.S, Z19.S, Z20.S, Z21.S }, P6/Z, [X4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5/Z, [X16, #-8, MUL VL] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+ ld4w { Z25.S, Z26.S, Z27.S, Z28.S }, P2/Z, [X23, X8, LSL #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ldapur W7, [X24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapur W25, [X29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapur X20, [X13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapur X29, [X4, #-199] // LDAPUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapurb W13, [X17] // LDAPURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapurb W20, [X19, #124] // LDAPURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapurh W3, [X22] // LDAPURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapurh W1, [X6, #113] // LDAPURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursb W7, [X8] // LDAPURSB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursb W29, [X22, #-76] // LDAPURSB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursb X29, [X7] // LDAPURSB <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursb X6, [X0, #-254] // LDAPURSB <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursh W17, [X19] // LDAPURSH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursh W26, [X18, #-114] // LDAPURSH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursh X3, [X3] // LDAPURSH <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursh X13, [X25, #30] // LDAPURSH <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldapursw X3, [X18] // LDAPURSW <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldapursw X21, [X25, #0] // LDAPURSW <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ ldar W9, [X20] // LDAR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldar W15, [X0, #0] // LDAR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldar X5, [X25] // LDAR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldar X11, [X2, #0] // LDAR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldarb W16, [X21] // LDARB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldarb W14, [X30, #0] // LDARB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldarh W26, [X25] // LDARH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldarh W21, [X2, #0] // LDARH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxp W13, W22, [X28] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxp W11, W19, [X20, #0] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxp X25, X8, [X16] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxp X28, X17, [X25, #0] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxr W4, [X5] // LDAXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxr W10, [X7, #0] // LDAXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxr X22, [X21] // LDAXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxr X7, [X1, #0] // LDAXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxrb W12, [X30] // LDAXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxrb W27, [X2, #0] // LDAXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldaxrh W30, [X16] // LDAXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldaxrh W14, [X3, #0] // LDAXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldff1b { Z10.B }, P3/Z, [X10] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z2.B }, P5/Z, [X28, X2] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z2.H }, P0/Z, [X14] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z30.H }, P3/Z, [X25, X18] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z17.S }, P5/Z, [X24] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z17.S }, P7/Z, [X11, X15] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z9.D }, P2/Z, [X3] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z5.D }, P2/Z, [X6, X8] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1b { Z7.D }, P3/Z, [X27, Z19.D, SXTW] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1b { Z13.S }, P3/Z, [X24, Z25.S, SXTW] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1b { Z27.D }, P0/Z, [X13, Z16.D] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1b { Z7.S }, P7/Z, [Z16.S] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1b { Z11.S }, P5/Z, [Z8.S, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1b { Z2.D }, P7/Z, [Z19.D] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1b { Z3.D }, P5/Z, [Z0.D, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1d { Z21.D }, P2/Z, [X20] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1d { Z9.D }, P3/Z, [X28, X30, LSL #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1d { Z21.D }, P4/Z, [X11, Z12.D, SXTW #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1d { Z6.D }, P4/Z, [X15, Z1.D, UXTW] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1d { Z12.D }, P7/Z, [X11, Z28.D, LSL #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1d { Z26.D }, P4/Z, [X30, Z5.D] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1d { Z10.D }, P5/Z, [Z10.D] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1d { Z21.D }, P6/Z, [Z3.D, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1h { Z14.H }, P3/Z, [X22] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z15.H }, P2/Z, [X24, X8, LSL #1] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z23.S }, P0/Z, [X12] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z18.S }, P0/Z, [X7, X25, LSL #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z16.D }, P0/Z, [X11] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z25.D }, P3/Z, [X24, X19, LSL #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1h { Z9.S }, P2/Z, [X3, Z24.S, SXTW #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1h { Z7.D }, P0/Z, [X8, Z17.D, UXTW #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1h { Z9.D }, P5/Z, [X4, Z10.D, SXTW] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1h { Z4.S }, P4/Z, [X6, Z27.S, UXTW] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1h { Z25.D }, P1/Z, [X29, Z6.D, LSL #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1h { Z10.D }, P7/Z, [X1, Z26.D] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1h { Z4.S }, P1/Z, [Z27.S] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1h { Z5.S }, P3/Z, [Z8.S, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1h { Z16.D }, P5/Z, [Z10.D] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1h { Z15.D }, P2/Z, [Z19.D, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sb { Z0.H }, P2/Z, [X2] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z29.H }, P1/Z, [X16, X21] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z20.S }, P7/Z, [X8] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z8.S }, P2/Z, [X4, X14] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z11.D }, P4/Z, [X6] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z17.D }, P4/Z, [X16, X10] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sb { Z13.D }, P2/Z, [X28, Z8.D, SXTW] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sb { Z3.S }, P2/Z, [X26, Z24.S, SXTW] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sb { Z10.D }, P7/Z, [X20, Z6.D] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sb { Z18.S }, P3/Z, [Z9.S] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sb { Z25.S }, P2/Z, [Z29.S, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sb { Z8.D }, P0/Z, [Z24.D] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sb { Z7.D }, P0/Z, [Z4.D, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sh { Z2.S }, P2/Z, [X6] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z9.S }, P3/Z, [X30, X16, LSL #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z7.D }, P4/Z, [X30] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z1.D }, P0/Z, [X29, X0, LSL #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldff1sh { Z25.S }, P4/Z, [X5, Z9.S, SXTW #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sh { Z17.D }, P3/Z, [X0, Z25.D, SXTW #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sh { Z12.D }, P7/Z, [X5, Z15.D, SXTW] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sh { Z8.S }, P5/Z, [X3, Z21.S, UXTW] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sh { Z14.D }, P6/Z, [X17, Z27.D, LSL #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sh { Z23.D }, P4/Z, [X22, Z0.D] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sh { Z6.S }, P4/Z, [Z6.S] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sh { Z3.S }, P7/Z, [Z26.S, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sh { Z25.D }, P3/Z, [Z17.D] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sh { Z2.D }, P3/Z, [Z31.D, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sw { Z16.D }, P2/Z, [X8] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sw { Z27.D }, P1/Z, [X6, X11, LSL #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1sw { Z27.D }, P3/Z, [X5, Z20.D, UXTW #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sw { Z15.D }, P1/Z, [X13, Z26.D, SXTW] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sw { Z24.D }, P2/Z, [X7, Z23.D, LSL #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1sw { Z8.D }, P3/Z, [X5, Z22.D] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sw { Z16.D }, P6/Z, [Z12.D] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1sw { Z3.D }, P1/Z, [Z13.D, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1w { Z2.S }, P5/Z, [X13] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z9.S }, P3/Z, [X16, X19, LSL #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z31.D }, P6/Z, [X3] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z30.D }, P4/Z, [X25, X12, LSL #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldff1w { Z27.S }, P6/Z, [X10, Z17.S, UXTW #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1w { Z8.D }, P4/Z, [X28, Z31.D, SXTW #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1w { Z1.D }, P0/Z, [X23, Z14.D, UXTW] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1w { Z17.S }, P5/Z, [X8, Z6.S, UXTW] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1w { Z19.D }, P3/Z, [X7, Z18.D, LSL #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1w { Z23.D }, P2/Z, [X16, Z4.D] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1w { Z24.S }, P6/Z, [Z24.S] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1w { Z20.S }, P0/Z, [Z6.S, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+ ldff1w { Z21.D }, P5/Z, [Z12.D] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldff1w { Z29.D }, P2/Z, [Z11.D, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+ ldnf1b { Z17.B }, P5/Z, [X20] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z8.B }, P5/Z, [X26, #1, MUL VL] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z4.H }, P3/Z, [X25] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z31.H }, P3/Z, [X7, #0, MUL VL] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z2.S }, P7/Z, [X25] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z17.S }, P5/Z, [X29, #2, MUL VL] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z6.D }, P5/Z, [X26] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1b { Z18.D }, P4/Z, [X20, #5, MUL VL] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1d { Z5.D }, P6/Z, [X6] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1d { Z19.D }, P0/Z, [X15, #-1, MUL VL] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z7.H }, P5/Z, [X22] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z27.H }, P1/Z, [X2, #6, MUL VL] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z18.S }, P2/Z, [X13] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z8.S }, P2/Z, [X29, #-8, MUL VL] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z26.D }, P5/Z, [X5] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1h { Z20.D }, P0/Z, [X29, #-6, MUL VL] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z17.H }, P0/Z, [X23] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z14.H }, P0/Z, [X18, #-5, MUL VL] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z23.S }, P0/Z, [X3] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z13.S }, P7/Z, [X15, #-8, MUL VL] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z14.D }, P4/Z, [X7] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sb { Z13.D }, P7/Z, [X25, #6, MUL VL] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z28.S }, P4/Z, [X9] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z3.S }, P1/Z, [X14, #-2, MUL VL] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z1.D }, P2/Z, [X0] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sh { Z14.D }, P3/Z, [X8, #3, MUL VL] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sw { Z8.D }, P4/Z, [X9] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1sw { Z28.D }, P4/Z, [X13, #-7, MUL VL] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z15.S }, P5/Z, [X27] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z28.S }, P0/Z, [X28, #-1, MUL VL] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z28.D }, P5/Z, [X13] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnf1w { Z4.D }, P0/Z, [X12, #2, MUL VL] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnp S1, S13, [X4] // LDNP <St1>, <St2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp S30, S5, [X11, #-184] // LDNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp D3, D12, [X21] // LDNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp D12, D5, [X7, #-424] // LDNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldnp Q0, Q14, [X24] // LDNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ldnp Q4, Q1, [X27, #80] // LDNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ldnp W4, W20, [X25] // LDNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldnp W30, W4, [X21, #-196] // LDNP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldnp X7, X30, [X18] // LDNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+ ldnp X5, X19, [X1, #-240] // LDNP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+ ldnt1b { Z9.B }, P2/Z, [X21] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1b { Z30.B }, P5/Z, [X30, #-3, MUL VL] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1b { Z10.B }, P5/Z, [X12, X17] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldnt1d { Z27.D }, P2/Z, [X12] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1d { Z5.D }, P7/Z, [X22, #6, MUL VL] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1d { Z28.D }, P2/Z, [X14, X0, LSL #3] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldnt1h { Z11.H }, P0/Z, [X21] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1h { Z19.H }, P1/Z, [X24, #-5, MUL VL] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1h { Z27.H }, P0/Z, [X22, X24, LSL #1] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+ ldnt1w { Z27.S }, P4/Z, [X19] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1w { Z15.S }, P0/Z, [X22, #3, MUL VL] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+ ldnt1w { Z25.S }, P4/Z, [X12, X21, LSL #2] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+ ldp S19, S15, [X24], #-64 // LDP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp D9, D1, [X20], #296 // LDP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp Q18, Q24, [X11], #144 // LDP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Load vector pair, immed post-index, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ldp S10, S30, [X0, #-4]! // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp D26, D11, [X16, #-304]! // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+ ldp Q18, Q12, [X25, #960]! // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Load vector pair, immed pre-index, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+ ldp S12, S31, [X20, #-192] // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldp D26, D6, [X22, #-144] // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldp Q5, Q19, [X9, #-448] // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+ ldp W10, W18, [X16], #-96 // LDP <Wt1>, <Wt2>, [<Xn|SP>], #<imm32> \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldp X13, X16, [X11], #288 // LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm64> \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 2 4 4 1.0 V1UnitL[3],V1UnitI
+ ldp W7, W16, [X13, #-116]! // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>]! \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldp X26, X3, [X14, #16]! // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>]! \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 2 4 4 1.0 V1UnitL[3],V1UnitI
+ ldp W25, W23, [X22] // LDP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldp W3, W21, [X17, #40] // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+ ldp X6, X25, [X17] // LDP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+ ldp X9, X21, [X3, #104] // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+ ldpsw X23, X26, [X30], #-160 // LDPSW <Xt1>, <Xt2>, [<Xn|SP>], #<imm> \\ Load pair, immed post-index or immed pre-index, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+ ldpsw X19, X28, [X21, #-248]! // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! \\ Load pair, immed post-index or immed pre-index, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+ ldpsw X13, X20, [X15] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+ ldpsw X9, X27, [X8, #80] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+ ldr W13, [X2], #-22 // LDR <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr X6, [X9], #248 // LDR <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr W20, [X10, #13]! // LDR <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr X23, [X20, #-24]! // LDR <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldr W19, [X15, #11620] // LDR <Wt>, [<Xn|SP>, #<pimm32>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldr X2, [X13, #18528] // LDR <Xt>, [<Xn|SP>, #<pimm64>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldr B0, [X15], #-18 // LDR <Bt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr H25, [X4], #-156 // LDR <Ht>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr S28, [X6], #162 // LDR <St>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr D23, [X8], #-176 // LDR <Dt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr Q5, [X18], #70 // LDR <Qt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr B9, [X0, #-104]! // LDR <Bt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr H24, [X10, #34]! // LDR <Ht>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr S29, [X5, #168]! // LDR <St>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr D22, [X9, #-1]! // LDR <Dt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr Q27, [X20, #-204]! // LDR <Qt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+ ldr B23, [X0, #349] // LDR <Bt>, [<Xn|SP>, #<pimmb>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr H1, [X15, #3540] // LDR <Ht>, [<Xn|SP>, #<pimmh>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr S14, [X7, #16208] // LDR <St>, [<Xn|SP>, #<pimms>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr D4, [X17, #7368] // LDR <Dt>, [<Xn|SP>, #<pimmd>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr Q14, [X6, #4624] // LDR <Qt>, [<Xn|SP>, #<pimmq>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+ ldr W15, test // LDR <Wt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldr X26, test // LDR <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldr S17, test // LDR <St>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr D10, test // LDR <Dt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr Q22, test // LDR <Qt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+ ldr P0, [X28] // LDR <Pt>, [<Xn|SP>] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+ ldr P1, [X6, #-53, MUL VL] // LDR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+ ldr W30, [X10, X0] // LDR <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldr X13, [X4, X21] // LDR <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldr W25, [X18, W26, UXTW] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X20, [X29, W26, UXTW] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W26, [X12, W0, UXTW #2] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X13, [X2, W10, UXTW #3] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W13, [X18, W19, SXTW] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X5, [X26, W12, SXTW] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W16, [X9, W24, SXTW #2] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X21, [X29, W4, SXTW #3] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W19, [X15, X1, SXTX] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr X25, [X4, X20, SXTX] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldr W3, [X1, X17, SXTX #2] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X2, [X13, X26, SXTX #3] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr W1, [X18, X17, LSL #2] // LDR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr X22, [X17, X3, LSL #3] // LDR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldr B8, [X30, X10] // LDR <Bt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr B25, [X21, W8, UXTW] // LDR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr B7, [X9, W29, SXTW] // LDR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr B31, [X17, X6, SXTX] // LDR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr H11, [X13, X9] // LDR <Ht>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr H6, [X4, W4, UXTW] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr H28, [X3, W28, SXTW] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr H3, [X15, X19, SXTX] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr H24, [X27, W5, UXTW #1] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H22, [X28, W11, SXTW #1] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H3, [X18, X26, SXTX #1] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr H8, [X23, X19, LSL #1] // LDR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr S21, [X1, X29] // LDR <St>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr S12, [X30, W5, UXTW] // LDR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S15, [X2, W20, SXTW] // LDR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S11, [X25, X20, SXTX] // LDR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr S9, [X24, W27, UXTW #2] // LDR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S7, [X2, W5, SXTW #2] // LDR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S13, [X19, X28, SXTX #2] // LDR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr S21, [X10, X4, LSL #2] // LDR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D10, [X23, X10] // LDR <Dt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr D24, [X26, W7, UXTW] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D28, [X12, W2, SXTW] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D0, [X7, X29, SXTX] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr D24, [X9, W27, UXTW #3] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D5, [X17, W2, SXTW #3] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D2, [X5, X16, SXTX #3] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr D2, [X29, X18, LSL #3] // LDR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+ ldr Q9, [X13, X16] // LDR <Qt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+ ldr Q16, [X16, W1, UXTW] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr Q1, [X17, W5, SXTW] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr Q1, [X8, X9, SXTX] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+ ldr Q23, [X26, W23, UXTW #4] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q3, [X18, W23, SXTW #4] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q2, [X28, X30, SXTX #4] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Q21, [X23, X27, LSL #4] // LDR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+ ldr Z26, [X4] // LDR <Zt>, [<Xn|SP>] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+ ldr Z18, [X27, #16, MUL VL] // LDR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+ ldrb W4, [X17], #0 // LDRB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrb W27, [X23, #114]! // LDRB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrb W26, [X19] // LDRB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrb W29, [X18, #3179] // LDRB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrb W16, [X25, X9] // LDRB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrb W9, [X15, W19, UXTW] // LDRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrb W25, [X7, W0, SXTW] // LDRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrb W0, [X18, X21, SXTX] // LDRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W9, [X1], #-2 // LDRH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrh W12, [X29, #-41]! // LDRH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrh W28, [X3] // LDRH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrh W27, [X19, #3156] // LDRH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrh W20, [X25, X15] // LDRH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrh W22, [X0, W24, UXTW] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W6, [X17, W18, SXTW] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W21, [X13, X30, SXTX] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrh W14, [X21, W21, UXTW #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrh W0, [X29, W13, SXTW #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrh W11, [X20, X0, SXTX #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrh W12, [X17, X27, LSL #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsb W12, [X13], #-250 // LDRSB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb X10, [X2], #-229 // LDRSB <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb W5, [X2, #-169]! // LDRSB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb X28, [X12, #-46]! // LDRSB <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsb W5, [X26] // LDRSB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb W24, [X0, #3862] // LDRSB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb X6, [X0] // LDRSB <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb X20, [X0, #653] // LDRSB <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsb W30, [X22, X21] // LDRSB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsb W24, [X2, W14, UXTW] // LDRSB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb W7, [X1, W8, SXTW] // LDRSB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb W4, [X8, X25, SXTX] // LDRSB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X12, [X28, X27] // LDRSB <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsb X10, [X5, W9, UXTW] // LDRSB <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X19, [X23, W24, SXTW] // LDRSB <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsb X20, [X10, X13, SXTX] // LDRSB <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W5, [X0], #-115 // LDRSH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh X30, [X18], #-50 // LDRSH <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh W27, [X15, #-45]! // LDRSH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh X14, [X24, #27]! // LDRSH <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsh W18, [X13] // LDRSH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh W11, [X27, #4094] // LDRSH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh X19, [X26] // LDRSH <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh X19, [X9, #6652] // LDRSH <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsh W18, [X30, X24] // LDRSH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsh W13, [X25, W7, UXTW] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W3, [X16, W28, SXTW] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W0, [X13, X14, SXTX] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh W0, [X5, W21, UXTW #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh W26, [X6, W29, SXTW #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh W22, [X26, X15, SXTX #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh W26, [X20, X21, LSL #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh X4, [X9, X24] // LDRSH <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsh X25, [X8, W13, UXTW] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X25, [X20, W10, SXTW] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X6, [X13, X10, SXTX] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsh X15, [X0, W28, UXTW #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh X19, [X9, W15, SXTW #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh X1, [X17, X26, SXTX #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsh X7, [X29, X17, LSL #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+ ldrsw X4, [X21], #-93 // LDRSW <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsw X6, [X28, #96]! // LDRSW <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+ ldrsw X1, [X23] // LDRSW <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsw X6, [X19, #4552] // LDRSW <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ ldrsw X20, test // LDRSW <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ ldrsw X21, [X25, X7] // LDRSW <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ ldrsw X12, [X28, W12, UXTW] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X22, [X26, W21, SXTW] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X0, [X21, X19, SXTX] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ ldrsw X23, [X17, W19, UXTW #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X23, [X30, W11, SXTW #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X29, [X12, X5, SXTX #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldrsw X3, [X1, X17, LSL #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ ldtr W12, [X9] // LDTR <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr W9, [X3, #-55] // LDTR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr X9, [X9] // LDTR <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtr X25, [X1, #103] // LDTR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrb W27, [X7] // LDTRB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrb W8, [X1, #-90] // LDTRB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrh W13, [X21] // LDTRH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrh W10, [X15, #-67] // LDTRH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb W15, [X19] // LDTRSB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb W28, [X19, #-202] // LDTRSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb X17, [X6] // LDTRSB <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsb X0, [X11, #180] // LDTRSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh W19, [X26] // LDTRSH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh W16, [X28, #-233] // LDTRSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh X26, [X22] // LDTRSH <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsh X27, [X19, #-76] // LDTRSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsw X23, [X28] // LDTRSW <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldtrsw X26, [X21, #45] // LDTRSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+ ldur B24, [X3] // LDUR <Bt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur B9, [X25, #240] // LDUR <Bt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur H29, [X21] // LDUR <Ht>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur H5, [X23, #-5] // LDUR <Ht>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur S12, [X14] // LDUR <St>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur S22, [X10, #108] // LDUR <St>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur D16, [X14] // LDUR <Dt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur D22, [X24, #-198] // LDUR <Dt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur Q25, [X9] // LDUR <Qt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur Q5, [X24, #233] // LDUR <Qt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+ ldur W19, [X30] // LDUR <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur W24, [X12, #202] // LDUR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur X0, [X3] // LDUR <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldur X14, [X14, #17] // LDUR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurb W9, [X24] // LDURB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurb W12, [X5, #92] // LDURB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurh W27, [X14] // LDURH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldurh W13, [X30, #-173] // LDURH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb W5, [X8] // LDURSB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb W21, [X10, #172] // LDURSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb X19, [X15] // LDURSB <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursb X16, [X11, #-173] // LDURSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh W21, [X12] // LDURSH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh W16, [X18, #203] // LDURSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh X4, [X28] // LDURSH <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursh X5, [X3, #-133] // LDURSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursw X21, [X7] // LDURSW <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldursw X11, [X16, #169] // LDURSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ ldxp W23, W14, [X17] // LDXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxp W2, W8, [X21, #0] // LDXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxp X5, X6, [X30] // LDXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxp X10, X26, [X6, #0] // LDXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxr W4, [X9] // LDXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxr W7, [X3, #0] // LDXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxr X6, [X27] // LDXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxr X3, [X4, #0] // LDXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxrb W17, [X21] // LDXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxrb W14, [X3, #0] // LDXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ ldxrh W14, [X1] // LDXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ ldxrh W24, [X11, #0] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ lsl W25, W0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsl X27, X7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsl Z1.B, P1/M, Z1.B, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z7.H, P3/M, Z7.H, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z10.S, P3/M, Z10.S, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z21.D, P7/M, Z21.D, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z13.B, Z4.B, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z11.H, Z16.H, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z16.S, Z11.S, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z18.D, Z4.D, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl W4, W9, W12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ lsl X7, X29, X22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ lsl Z3.D, P2/M, Z3.D, Z15.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z3.S, P6/M, Z3.S, Z8.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z19.S, Z25.S, Z25.D // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lslr Z3.H, P5/M, Z3.H, Z23.H // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lslv W6, W8, W2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lslv X7, X26, X21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lsr W0, W0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr X23, X24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr Z21.B, P5/M, Z21.B, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z1.H, P4/M, Z1.H, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z24.S, P7/M, Z24.S, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z13.D, P3/M, Z13.D, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z3.B, Z11.B, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z5.H, Z12.H, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z21.S, Z16.S, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z21.D, Z15.D, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr W17, W20, W15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr X24, X4, X20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ lsr Z30.D, P3/M, Z30.D, Z28.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z18.H, P3/M, Z18.H, Z29.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z7.H, Z30.H, Z11.D // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsrr Z14.B, P1/M, Z14.B, Z16.B // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsrv W0, W28, W19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ lsrv X16, X22, X19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ mad Z17.B, P7/M, Z4.B, Z5.B // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z29.H, P4/M, Z31.H, Z18.H // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z7.S, P4/M, Z5.S, Z29.S // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mad Z28.D, P7/M, Z18.D, Z2.D // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ madd W15, W9, W9, W29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ madd X29, X22, X21, X21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ mla V15.8H, V22.8H, V4.H[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla V28.2S, V10.2S, V2.S[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla V31.4S, V18.4S, V27.4S // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mla Z1.B, P0/M, Z3.B, Z3.B // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z21.H, P2/M, Z31.H, Z30.H // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z24.S, P3/M, Z11.S, Z9.S // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mla Z2.D, P0/M, Z12.D, Z5.D // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mls V25.8H, V29.8H, V0.H[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls V22.2S, V29.2S, V0.S[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls V26.4S, V5.4S, V28.4S // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+ mls Z11.B, P1/M, Z28.B, Z6.B // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z31.H, P0/M, Z25.H, Z24.H // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z1.S, P5/M, Z7.S, Z13.S // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ mls Z2.D, P1/M, Z17.D, Z10.D // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mneg W14, W30, W30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mneg X21, X3, X9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mov Z9.S, P2/M, S10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z17.B, Z29.B[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z26.H, Z7.H[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z14.S, Z21.S[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z22.D, Z14.D[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z21.S, S25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov WSP, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X3, #0x1e00 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov V30.B[12], V17.B[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V10.H[3], V17.H[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V19.S[2], V2.S[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V21.D[1], V16.D[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+ mov V5.B[12], W23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V27.H[6], W6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V21.S[0], W21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov V13.D[0], X10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+ mov Z30.B, P7/M, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z30.D, P7/M, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z10.H, P5/M, #72, LSL #0 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z19.B, P6/Z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z6.D, P1/Z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z12.D, P7/Z, #40, LSL #8 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+ mov Z30.B, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z2.H, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z20.H, #82, LSL #8 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov W24, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov X15, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov P0.B, P0/M, P6.B // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+ mov P3.B, P7/Z, P2.B // MOV <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ mov W21, W11 // MOV <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X14, X0 // MOV <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov B15, V21.B[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov H13, V17.H[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov S7, V11.S[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov D27, V24.D[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+ mov Z12.D, P5/M, X24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ mov Z31.D, P6/M, SP // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ mov Z19.B, W27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ mov Z17.H, WSP // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+ mov W13, V12.S[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ mov X30, V18.D[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ mov WSP, WSP // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov X1, X11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ mov V12.16B, V6.16B // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ mov Z1.D, P3/M, Z6.D // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+ mov Z24.D, Z25.D // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ mov W30, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov X4, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mov Z14.B, #0x70 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z8.H, #0x60 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z2.S, #0x2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z6.D, #0x4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov P2.B, P5.B // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ movi V7.16B, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V14.8H, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V13.4H, #74, LSL #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V19.2S, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V0.2S, #137, LSL #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V1.4S, #122, MSL #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi D16, #0 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movi V13.2D, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ movk W8, #57951 // MOVK <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk W6, #34540, LSL #0 // MOVK <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk X1, #56641 // MOVK <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movk X23, #3111, LSL #48 // MOVK <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn W16, #52526 // MOVN <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn W27, #47742, LSL #0 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn X10, #63431 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movn X0, #58015, LSL #48 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movprfx Z22.B, P0/M, Z4.B // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ mla Z22.B, P0/M, Z19.B, Z25.B // Ignore
+ movprfx Z3, Z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ fmla Z3.D, P0/M, Z8.D, Z19.D // Ignore
+ movs P0.B, P7/Z, P3.B // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ movs P4.B, P0.B // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ movz W3, #9629 // MOVZ <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz W23, #10835, LSL #16 // MOVZ <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz X0, #22630 // MOVZ <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ movz X11, #20464, LSL #48 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+ mrs X4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
+ mrs X14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
+ msb Z18.B, P1/M, Z27.B, Z0.B // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z27.H, P5/M, Z23.H, Z1.H // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z26.S, P2/M, Z0.S, Z2.S // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+ msb Z1.D, P6/M, Z12.D, Z12.D // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
+ msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
+ msr ACTLR_EL3, X18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
+ msr S3_6_C8_C12_1, X23 // MSR S<op0>_<op1>_<Cn>_<Cm>_<op2>, <Xt> \\ No description \\ No scheduling info
+ msub W6, W26, W13, W13 // MSUB <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ msub X14, X28, X9, X3 // MSUB <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+ mul V26.4H, V20.4H, V14.H[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V5.8H, V21.8H, V3.H[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V29.2S, V10.2S, V3.S[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul V30.4S, V11.4S, V4.S[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul Z16.B, Z16.B, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z9.H, Z9.H, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z23.S, Z23.S, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z15.D, Z15.D, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ mul V3.8H, V9.8H, V8.8H // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ mul Z17.B, P6/M, Z17.B, Z9.B // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z18.H, P7/M, Z18.H, Z15.H // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z29.S, P6/M, Z29.S, Z8.S // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ mul Z25.D, P1/M, Z25.D, Z25.D // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ mul W8, W13, W20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mul X12, X8, X25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+ mvn W0, W18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn W25, W27, ASR #6 // MVN <Wd>, <Wm>, <shift> #<wamount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn X1, X21 // MVN <Xd>, <Xm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn X9, X23, ASR #39 // MVN <Xd>, <Xm>, <shift> #<amount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+ mvn V16.16B, V24.16B // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ mvni V9.4H, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V8.8H, #171, LSL #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V7.2S, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V22.4S, #15, LSL #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ mvni V12.4S, #141, MSL #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+ nand P5.B, P4/Z, P5.B, P5.B // NAND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ nands P6.B, P3/Z, P4.B, P5.B // NANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ neg W25, W20, LSL #4 // NEG <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ neg W0, W29, LSL #9 // NEG <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ neg W7, W28, ASR #24 // NEG <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ neg X29, X11, LSL #3 // NEG <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ neg X24, X10, LSL #54 // NEG <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ neg X0, X16, LSR #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ neg D18, D20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ neg V16.2D, V14.2D // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ neg Z16.B, P2/M, Z15.B // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ negs W30, W22, LSL #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ negs W8, W8, LSL #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ negs W12, W21, ASR #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ negs X24, X23, LSL #1 // NEGS <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ negs X20, X13, LSL #20 // NEGS <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ negs X1, X22, LSR #30 // NEGS <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ ngc W11, W9 // NGC <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ ngc X30, X4 // NGC <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ ngcs W13, W22 // NGCS <Wd>, <Wm> \\ No description \\ No scheduling info
+ ngcs X15, X1 // NGCS <Xd>, <Xm> \\ No description \\ No scheduling info
+ nop // NOP \\ No description \\ No scheduling info
+ nor P4.B, P4/Z, P0.B, P4.B // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ nors P1.B, P0/Z, P7.B, P6.B // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ not P7.B, P2/Z, P6.B // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ not Z29.S, P4/M, Z9.S // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ not V15.8B, V29.8B // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ nots P7.B, P3/Z, P1.B // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ orn Z5.B, Z5.B, #0x70 // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z14.H, Z14.H, #0x60 // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z14.S, Z14.S, #0x2 // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z27.D, Z27.D, #0x4 // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn P1.B, P2/Z, P3.B, P5.B // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ orn W2, W27, W7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn W6, W28, W14, LSL #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn X22, X12, X3 // ORN <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn X19, X17, X0, LSL #58 // ORN <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orn V29.8B, V19.8B, V16.8B // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orns P3.B, P3/Z, P0.B, P3.B // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ orr WSP, W27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ orr X27, X6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ orr Z4.B, Z4.B, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z26.H, Z26.H, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z3.S, Z3.S, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z30.D, Z30.D, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr P6.B, P4/Z, P4.B, P3.B // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+ orr W14, W1, W23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr W25, W22, W0, ASR #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr X11, X6, X13 // ORR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr X26, X26, X7, LSL #62 // ORR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+ orr V9.4H, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V20.8H, #175, LSL #0 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V4.4S, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V17.4S, #119, LSL #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr V12.16B, V9.16B, V1.16B // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+ orr Z28.H, P3/M, Z28.H, Z7.H // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z8.D, Z14.D, Z19.D // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orrs P7.B, P7/Z, P6.B, P5.B // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+ orv D19, P6, Z31.D // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ pfalse P6.B // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ pfirst P0.B, P5, P0.B // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ pmul V30.8B, V0.8B, V27.8B // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
+ pmul V7.16B, V20.16B, V18.16B // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
+ pnext P5.S, P5, P5.S // PNEXT <Pdn>.<T>, <Pv>, <Pdn>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ prfb #14, P5, [X21] // PRFB #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfb #14, P3, [X28, #-24, MUL VL] // PRFB #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfb PSTL1STRM, P7, [X5] // PRFB <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfb PLDL2KEEP, P1, [X12, #11, MUL VL] // PRFB <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfb PLDL1KEEP, P7, [X4, X9] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Xm>] \\ No description \\ No scheduling info
+ prfb PLDL3STRM, P4, [X3, Z15.S, UXTW] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ No description \\ No scheduling info
+ prfb PLDL1STRM, P7, [X28, Z4.D, UXTW] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ No description \\ No scheduling info
+ prfb PSTL3KEEP, P2, [X18, Z19.D] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] \\ No description \\ No scheduling info
+ prfb #12, P1, [Z28.S] // PRFB #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfb #8, P0, [Z22.S, #21] // PRFB #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfb PSTL1STRM, P2, [Z25.S] // PRFB <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfb PSTL2STRM, P1, [Z31.S, #18] // PRFB <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfb #11, P5, [Z25.D] // PRFB #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfb #2, P2, [Z4.D, #10] // PRFB #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfb PSTL2KEEP, P5, [Z5.D] // PRFB <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfb PLDL1KEEP, P1, [Z31.D, #17] // PRFB <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfd #13, P3, [X21] // PRFD #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfd #4, P5, [X3, #-7, MUL VL] // PRFD #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfd PSTL3KEEP, P0, [X29] // PRFD <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfd PLDL1STRM, P3, [X15, #-16, MUL VL] // PRFD <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfd PSTL2KEEP, P3, [X24, X24, LSL #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ No description \\ No scheduling info
+ prfd PSTL1STRM, P3, [X27, Z27.S, SXTW #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P0, [X21, Z2.D, UXTW #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ No description \\ No scheduling info
+ prfd PLDL1STRM, P7, [X22, Z22.D, LSL #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ No description \\ No scheduling info
+ prfd #3, P1, [Z2.S] // PRFD #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfd #8, P7, [Z10.S, #72] // PRFD #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P3, [Z19.S] // PRFD <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfd PSTL2STRM, P4, [Z26.S, #248] // PRFD <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfd #15, P1, [Z17.D] // PRFD #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfd #3, P0, [Z6.D, #24] // PRFD #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfd PSTL1KEEP, P3, [Z31.D] // PRFD <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfd PSTL1STRM, P7, [Z10.D, #40] // PRFD <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfh #3, P3, [X17] // PRFH #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfh #6, P3, [X6, #19, MUL VL] // PRFH #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfh PLDL3KEEP, P6, [X2] // PRFH <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfh PLDL2KEEP, P6, [X18, #-4, MUL VL] // PRFH <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P1, [X28, X9, LSL #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ No description \\ No scheduling info
+ prfh PLDL1STRM, P6, [X0, Z10.S, UXTW #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ No description \\ No scheduling info
+ prfh PLDL3KEEP, P7, [X24, Z21.D, UXTW #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ No description \\ No scheduling info
+ prfh PSTL1STRM, P5, [X10, Z6.D, LSL #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ No description \\ No scheduling info
+ prfh PLDL3STRM, P6, [Z0.S] // PRFH <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfh PSTL3STRM, P0, [Z30.S, #12] // PRFH <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P2, [Z21.D] // PRFH <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfh PSTL2KEEP, P1, [Z8.D, #14] // PRFH <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfm PLDL1STRM, [X5] // PRFM <prfop>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL3KEEP, [X19, #10160] // PRFM <prfop>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm #25, [X28] // PRFM #<imm5>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm #7, [X15, #6776] // PRFM #<imm5>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL3STRM, test // PRFM <prfop>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ prfm #0, test // PRFM #<imm5>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X25, X16] // PRFM <prfop>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ prfm #24, [X1, X18] // PRFM #<imm5>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X14, W8, UXTW] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #12, [X8, W5, UXTW] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL2KEEP, [X16, W16, SXTW] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #11, [X25, W11, SXTW] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL2STRM, [X3, X24, SXTX] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm #25, [X5, X2, SXTX] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL2KEEP, [X10, W29, UXTW #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #17, [X9, W27, UXTW #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PLDL1KEEP, [X24, W0, SXTW #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #4, [X30, W25, SXTW #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL1STRM, [X18, X20, SXTX #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #19, [X29, X25, SXTX #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm PSTL3KEEP, [X2, X5, LSL #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfm #8, [X22, X3, LSL #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+ prfum PSTL1KEEP, [X7] // PRFUM <prfop>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum PLDL2KEEP, [X7, #-37] // PRFUM <prfop>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum #20, [X21] // PRFUM #<imm5>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfum #23, [X6, #-131] // PRFUM #<imm5>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+ prfw #3, P2, [X4] // PRFW #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfw #6, P4, [X7, #6, MUL VL] // PRFW #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfw PLDL3KEEP, P3, [X2] // PRFW <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P7, [X2, #-31, MUL VL] // PRFW <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P4, [X18, X21, LSL #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ No description \\ No scheduling info
+ prfw PLDL2STRM, P0, [X15, Z6.S, UXTW #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ No description \\ No scheduling info
+ prfw PSTL2KEEP, P0, [X27, Z18.D, SXTW #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ No description \\ No scheduling info
+ prfw PSTL2KEEP, P3, [X19, Z8.D, LSL #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ No description \\ No scheduling info
+ prfw #7, P7, [Z27.S] // PRFW #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfw #9, P5, [Z16.S, #72] // PRFW #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfw PLDL3KEEP, P4, [Z2.S] // PRFW <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+ prfw PSTL3KEEP, P2, [Z0.S, #40] // PRFW <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+ prfw #7, P1, [Z20.D] // PRFW #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfw #7, P2, [Z10.D, #108] // PRFW #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ prfw PSTL1KEEP, P6, [Z12.D] // PRFW <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+ prfw PSTL2STRM, P0, [Z18.D, #60] // PRFW <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+ psb CSYNC // PSB CSYNC \\ No description \\ No scheduling info
+ pssbb // PSSBB \\ No description \\ No scheduling info
+ ptest P0, P5.B // PTEST <Pg>, <Pn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P2.B // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P3.D, POW2 // PTRUE <Pd>.<T>, <pattern> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P0.H // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrue P4.S, #21 // PTRUE <Pd>.<T>, #<uimm5> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+ ptrues P3.H // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+ ptrues P3.D, VL32 // PTRUES <Pd>.<T>, <pattern> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+ ptrues P0.B // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+ ptrues P2.D, #12 // PTRUES <Pd>.<T>, #<uimm5> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+ punpkhi P4.H, P4.B // PUNPKHI <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+ punpklo P1.H, P4.B // PUNPKLO <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+ raddhn V17.2S, V22.2D, V5.2D // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ raddhn2 V21.4S, V11.2D, V1.2D // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ rbit V16.16B, V21.16B // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
+ rbit W27, W10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rbit X30, X0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rbit Z23.S, P3/M, Z10.S // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ rdffr P2.B, P1/Z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 1 3 3 0.5 V1UnitM0[2]
+ rdffr P5.B // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
+ rdffrs P7.B, P2/Z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 0.33 V1UnitM[6]
+ rdvl X20, #-20 // RDVL <Xd>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ ret X14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+ rev P1.H, P2.H // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
+ rev Z11.D, Z24.D // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ rev W19, W20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev X30, X15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev16 V5.16B, V26.16B // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ rev16 W1, W25 // REV16 <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev16 X27, X11 // REV16 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev32 V22.8H, V4.8H // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ rev32 X30, X6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev64 X5, X2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+ rev64 V0.2S, V19.2S // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+ revb Z3.D, P2/M, Z21.D // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ revh Z1.D, P5/M, Z19.D // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ revw Z16.D, P1/M, Z3.D // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ ror W20, W13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ ror X5, X8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+ ror W29, W26, W0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ ror X4, X13, X3 // ROR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+ rorv W26, W0, W28 // RORV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ rorv X21, X29, X17 // RORV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+ rshrn V24.8B, V0.8H, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn V8.4H, V24.4S, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn V12.2S, V12.2D, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V1.16B, V16.8H, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V1.8H, V28.4S, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rshrn2 V20.4S, V19.2D, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ rsubhn V3.8B, V9.8H, V16.8H // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ rsubhn2 V31.4S, V12.2D, V15.2D // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ saba V8.16B, V27.16B, V13.16B // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+ sabal V2.2D, V5.2S, V31.2S // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ sabal2 V21.2D, V15.4S, V13.4S // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ sabd V12.2S, V11.2S, V27.2S // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+ sabd Z14.S, P1/M, Z14.S, Z23.S // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sabdl V28.2D, V4.2S, V19.2S // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ sabdl2 V10.8H, V30.16B, V4.16B // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ sadalp V3.4H, V5.8B // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+ saddl V7.8H, V3.8B, V23.8B // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddl2 V21.4S, V5.8H, V10.8H // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddlp V13.8H, V29.16B // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ saddlv H18, V28.8B // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ saddlv H30, V4.16B // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ saddlv S24, V29.4H // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ saddlv S22, V23.8H // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ saddlv D2, V27.4S // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ saddv D19, P6, Z1.B // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ saddv D7, P2, Z14.H // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ saddv D4, P7, Z27.S // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ saddw V8.4S, V0.4S, V1.4H // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ saddw2 V24.8H, V10.8H, V30.16B // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sbc W0, W16, W1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sbc X19, X3, X9 // SBC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sbcs W26, W28, W0 // SBCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ sbcs X8, X26, X29 // SBCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ sbfiz W14, W5, #21, #8 // SBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ sbfiz X14, X1, #56, #2 // SBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ sbfm W24, W11, #27, #19 // SBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ sbfm X14, X1, #36, #55 // SBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ sbfx W16, W16, #31, #1 // SBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ sbfx X14, X28, #53, #8 // SBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ scvtf H18, W17, #30 // SCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S14, W9, #19 // SCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D16, W3, #13 // SCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H28, X25, #23 // SCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S27, X19, #5 // SCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D15, X22, #32 // SCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H22, W7 // SCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S22, W10 // SCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D23, W6 // SCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H21, X12 // SCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf S25, X28 // SCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf D12, X0 // SCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ scvtf H4, H8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf S29, S12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf D1, D12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V25.4H, V13.4H, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ scvtf V4.8H, V8.8H, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ scvtf V5.2S, V2.2S, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V2.4S, V24.4S, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ scvtf V11.2D, V2.2D, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf H5, H14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf S5, S16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf D12, D11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V22.4H, V10.4H // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ scvtf V16.8H, V13.8H // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ scvtf V9.2S, V31.2S // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf V2.4S, V7.4S // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ scvtf V18.2D, V11.2D // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ scvtf Z3.H, P3/M, Z29.H // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
+ scvtf Z1.H, P5/M, Z27.S // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ scvtf Z30.S, P4/M, Z29.S // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ scvtf Z18.D, P3/M, Z16.S // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ scvtf Z18.H, P1/M, Z14.D // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ scvtf Z10.S, P1/M, Z11.D // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ scvtf Z3.D, P2/M, Z27.D // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ sdiv W6, W28, W24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
+ sdiv X19, X2, X14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
+ sdiv Z24.S, P1/M, Z24.S, Z14.S // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+ sdiv Z7.D, P6/M, Z7.D, Z20.D // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+ sdivr Z10.S, P2/M, Z10.S, Z7.S // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+ sdivr Z0.D, P3/M, Z0.D, Z9.D // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+ sdot Z6.S, Z29.B, Z0.B[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ sdot Z0.D, Z18.H, Z10.H[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ sdot Z28.S, Z30.B, Z14.B // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ sdot Z19.D, Z5.H, Z8.H // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ sdot V2.4S, V27.16B, V5.4B[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ sdot V3.2S, V20.8B, V10.8B // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ sel P1.B, P7, P5.B, P4.B // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+ sel Z0.H, P7, Z13.H, Z13.H // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+ setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
+ sev // SEV \\ No description \\ No scheduling info
+ sevl // SEVL \\ No description \\ No scheduling info
+ shadd V25.16B, V1.16B, V10.16B // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ shl D17, D3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V23.8B, V18.8B, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V0.8H, V23.8H, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V0.4S, V18.4S, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shl V20.2D, V28.2D, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V3.8H, V13.8B, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V26.4S, V18.4H, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll V4.2D, V25.2S, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V12.8H, V28.16B, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V11.4S, V22.8H, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shll2 V2.2D, V29.4S, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V27.8B, V23.8H, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V17.4H, V1.4S, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn V13.2S, V0.2D, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V4.16B, V29.8H, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V9.8H, V18.4S, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shrn2 V5.4S, V12.2D, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ shsub V15.8H, V5.8H, V27.8H // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sli D7, D19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V16.16B, V26.16B, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V14.4H, V10.4H, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V29.2S, V14.2S, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sli V25.2D, V21.2D, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ smaddl X17, W27, W30, X3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ smax Z3.S, Z3.S, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smax Z0.B, P5/M, Z0.B, Z20.B // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smax V30.16B, V3.16B, V30.16B // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ smaxp V21.8H, V16.8H, V7.8H // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ smaxv B4, V30.8B // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ smaxv B15, V16.16B // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ smaxv H28, V14.4H // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ smaxv H6, V19.8H // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ smaxv S3, V14.4S // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ smaxv B19, P4, Z14.B // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ smaxv H0, P6, Z20.H // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ smaxv S11, P2, Z28.S // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ smaxv D24, P5, Z24.D // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ smc #32343 // SMC #<imm> \\ No description \\ No scheduling info
+ smin Z21.S, Z21.S, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smin Z22.S, P0/M, Z22.S, Z30.S // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smin V29.4S, V24.4S, V24.4S // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ sminp V7.8H, V27.8H, V7.8H // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ sminv B6, V11.8B // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ sminv B24, V8.16B // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ sminv H24, V23.4H // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ sminv H2, V9.8H // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ sminv S16, V15.4S // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ sminv B4, P2, Z10.B // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ sminv H15, P7, Z10.H // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ sminv S29, P0, Z27.S // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ sminv D17, P2, Z18.D // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ smlal V16.4S, V9.4H, V11.H[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal V0.2D, V25.2S, V1.S[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V1.4S, V9.8H, V0.H[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V30.2D, V22.4S, V7.S[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal V25.8H, V24.8B, V28.8B // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlal2 V30.4S, V31.8H, V13.8H // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V14.4S, V23.4H, V12.H[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V25.2D, V27.2S, V1.S[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V12.4S, V11.8H, V12.H[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V11.2D, V28.4S, V7.S[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl V11.4S, V14.4H, V15.4H // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smlsl2 V21.4S, V27.8H, V16.8H // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ smmla V0.4S, V17.16B, V31.16B // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ smnegl X3, W23, W18 // SMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ smov W15, V22.B[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov W6, V28.B[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov W26, V27.H[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov W18, V29.H[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X21, V0.B[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X16, V29.B[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X9, V27.H[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X4, V21.H[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X15, V3.S[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smov X5, V29.S[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ smsubl X8, W24, W13, X6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ smulh Z11.B, P5/M, Z11.B, Z17.B // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z8.H, P0/M, Z8.H, Z4.H // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z27.S, P7/M, Z27.S, Z30.S // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ smulh Z4.D, P7/M, Z4.D, Z28.D // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ smulh X8, X29, X17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+ smull X19, W0, W6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ smull V3.4S, V26.4H, V1.H[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull V31.2D, V23.2S, V6.S[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V13.4S, V18.8H, V0.H[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V11.2D, V1.4S, V7.S[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull V28.2D, V26.2S, V20.2S // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ smull2 V7.2D, V14.4S, V15.4S // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqabs D15, D26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqabs V25.8H, V24.8H // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqadd Z1.B, Z1.B, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z18.H, Z18.H, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z3.D, Z3.D, #158, LSL #0 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z19.D, Z27.D, Z28.D // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd H12, H18, H10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqadd V15.2S, V13.2S, V28.2S // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqdecb X26, W26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X16, W16, VL64 // SQDECB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X4, W4, VL1, MUL #16 // SQDECB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X4 // SQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X28, VL6 // SQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecb X20, VL7, MUL #4 // SQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X1, W1 // SQDECD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X11, W11, MUL3 // SQDECD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X14, W14, VL2, MUL #16 // SQDECD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X11, VL5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd X21, ALL, MUL #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecd Z27.D // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecd Z2.D, VL128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecd Z23.D, VL1, MUL #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdech X7, W7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X10, W10, VL128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X16, W16, VL6, MUL #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X17, VL128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech X27, VL128, MUL #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdech Z16.H // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdech Z21.H, VL6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdech Z7.H, MUL3, MUL #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecp X1, P4.B, W1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqdecp X26, P6.D // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqdecp Z10.D, P3 // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ sqdecw X13, W13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X2, W2, POW2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X26, W26, VL8, MUL #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X17, VL128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw X13, MUL4, MUL #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqdecw Z7.S // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecw Z10.S, POW2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecw Z28.S, VL2, MUL #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdmlal S23, H16, V4.H[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal D12, S18, V3.S[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V20.4S, V30.4H, V12.H[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V11.2D, V24.2S, V0.S[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V2.4S, V17.8H, V5.H[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V23.2D, V30.4S, V6.S[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal D16, S12, S15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal V8.4S, V24.4H, V31.4H // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlal2 V29.4S, V11.8H, V13.8H // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl S26, H21, V11.H[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl D6, S16, V3.S[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V4.4S, V22.4H, V13.H[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V26.2D, V7.2S, V3.S[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V2.4S, V28.8H, V4.H[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V4.2D, V3.4S, V3.S[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl D13, S21, S8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl V11.4S, V19.4H, V5.4H // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmlsl2 V27.4S, V8.8H, V22.8H // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh H14, H17, V6.H[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh S19, S6, V6.S[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V8.4H, V16.4H, V5.H[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V16.2S, V24.2S, V7.S[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh H26, H21, H17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmulh V20.2S, V11.2S, V29.2S // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqdmull S25, H5, V1.H[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull D29, S23, V0.S[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull V8.4S, V19.4H, V1.H[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull V20.2D, V10.2S, V6.S[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V10.4S, V25.8H, V0.H[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V4.2D, V29.4S, V2.S[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull D19, S2, S0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull V14.2D, V23.2S, V13.2S // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqdmull2 V12.4S, V11.8H, V1.8H // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ sqincb X12, W12 // SQINCB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X1, W1, VL8 // SQINCB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X16, W16, VL2, MUL #16 // SQINCB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X5 // SQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X4, VL6 // SQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincb X30, ALL, MUL #7 // SQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X28, W28 // SQINCD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X16, W16, VL8 // SQINCD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X22, W22, VL6, MUL #16 // SQINCD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X17, VL5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd X13, VL64, MUL #1 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincd Z24.D // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincd Z10.D, VL128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincd Z29.D, VL128, MUL #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqinch X28, W28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X30, W30, VL1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X16, W16, VL4, MUL #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X10, VL64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch X16, POW2, MUL #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqinch Z3.H // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqinch Z23.H, VL4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqinch Z6.H, VL128, MUL #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincp X13, P2.H, W13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqincp X0, P7.H // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ sqincp Z9.H, P1 // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ sqincw X24, W24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X16, W16, MUL4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X27, W27, VL32, MUL #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X25, VL7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw X21, VL8, MUL #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ sqincw Z30.S // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincw Z8.S, MUL3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincw Z0.S, VL5, MUL #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqneg D24, D22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqneg V30.16B, V15.16B // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqrdmlah H14, H4, V6.H[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah S24, S17, V6.S[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V17.4H, V18.4H, V4.H[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V10.2S, V17.2S, V3.S[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah S3, S3, S5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlah V16.8H, V30.8H, V28.8H // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh H13, H26, V4.H[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh S26, S29, V7.S[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V1.8H, V21.8H, V8.H[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V8.4H, V11.4H, V1.H[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V20.2S, V29.2S, V4.S[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V21.4S, V9.4S, V1.S[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh S30, S20, S13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmlsh V20.4H, V2.4H, V23.4H // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh H3, H25, V2.H[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh S9, S24, V4.S[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V0.8H, V15.8H, V0.H[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V6.2S, V29.2S, V4.S[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh H5, H2, H20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrdmulh V31.2S, V17.2S, V4.2S // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+ sqrshl D6, D1, D30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshl V15.8B, V26.8B, V21.8B // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn B6, H24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn H11, S22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn S4, D9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn V31.8B, V31.8H, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn V27.4H, V11.4S, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn V4.2S, V30.2D, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V11.16B, V30.8H, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V14.8H, V3.4S, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrn2 V13.4S, V28.2D, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun B5, H0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun H25, S11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun S15, D18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun V0.8B, V3.8H, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun V5.4H, V8.4S, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun V7.2S, V8.2D, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V14.16B, V14.8H, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V9.8H, V16.4S, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqrshrun2 V12.4S, V23.2D, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl B15, B3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl H21, H0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl S26, S9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl D8, D23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V25.16B, V26.16B, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V29.4H, V1.4H, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V0.2S, V5.2S, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V11.2D, V2.2D, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl S17, S4, S23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshl V23.16B, V23.16B, V23.16B // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshlu B3, B27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
+ sqshlu H23, H4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
+ sqshlu S29, S29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
+ sqshlu D14, D5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
+ sqshlu V11.8B, V17.8B, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
+ sqshlu V18.8H, V8.8H, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
+ sqshlu V25.4S, V7.4S, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
+ sqshlu V19.2D, V14.2D, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
+ sqshrn B17, H30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn H30, S15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn S16, D0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn V3.8B, V25.8H, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn V23.4H, V14.4S, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn V6.2S, V29.2D, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V31.16B, V31.8H, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V13.8H, V6.4S, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrn2 V30.4S, V0.2D, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun B3, H16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun H11, S10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun S18, D1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun V21.8B, V27.8H, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun V18.4H, V19.4S, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun V2.2S, V14.2D, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun2 V10.16B, V28.8H, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun2 V4.8H, V28.4S, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqshrun2 V7.4S, V18.2D, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ sqsub Z13.B, Z13.B, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z28.H, Z28.H, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z11.S, Z11.S, #14, LSL #0 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z28.S, Z9.S, Z12.S // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub B3, B13, B12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqsub V20.8H, V18.8H, V12.8H // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sqxtn B11, H22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtn V3.2S, V17.2D // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtn2 V17.8H, V27.4S // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun B30, H18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun V26.8B, V21.8H // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ sqxtun2 V22.16B, V6.8H // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ srhadd V29.8B, V3.8B, V8.8B // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ sri D30, D17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V23.16B, V30.16B, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V1.4H, V0.4H, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V28.2S, V6.2S, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ sri V8.2D, V19.2D, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+ srshl D30, D8, D8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ srshl V20.8B, V23.8B, V27.8B // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr D20, D18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V20.8B, V0.8B, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V27.8H, V19.8H, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V8.2S, V20.2S, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srshr V31.2D, V17.2D, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ srsra D13, D10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V31.16B, V15.16B, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V14.4H, V27.4H, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V17.2S, V8.2S, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ srsra V22.2D, V4.2D, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssbb // SSBB \\ No description \\ No scheduling info
+ sshl D29, D30, D9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ sshl V13.2D, V7.2D, V27.2D // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V9.8H, V2.8B, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V12.4S, V3.4H, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll V17.2D, V6.2S, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V28.8H, V12.16B, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V29.4S, V22.8H, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshll2 V17.2D, V13.4S, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr D3, D18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V20.8B, V28.8B, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V20.4H, V23.4H, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V13.2S, V23.2S, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sshr V3.2D, V8.2D, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ssra D28, D30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V9.8B, V18.8B, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V21.4H, V24.4H, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V28.2S, V17.2S, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssra V0.2D, V23.2D, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ssubl V13.4S, V9.4H, V5.4H // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubl2 V18.4S, V29.8H, V17.8H // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubw V5.2D, V13.2D, V4.2S // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ssubw2 V4.4S, V26.4S, V31.8H // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ st1 { V18.8B }, [X15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V31.16B }, [X29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V19.4H }, [X7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V27.8H }, [X17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V25.2S }, [X6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V22.4S }, [X19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V20.1D }, [X10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V8.2D }, [X15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V16.8B }, [X14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V10.16B }, [X8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.4H }, [X17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V14.8H }, [X28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V18.2S }, [X20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V28.4S }, [X1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V17.1D }, [X27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V30.2D }, [X4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.8B }, [X8], X7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V4.16B }, [X7], X26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V17.4H }, [X10], X4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V18.8H }, [X15], X1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V6.2S }, [X17], X24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V26.4S }, [X20], X29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.1D }, [X3], X20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.2D }, [X21], X11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.8B, V9.8B }, [X18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V1.16B, V2.16B }, [X4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V22.4H, V23.4H }, [X22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V18.8H, V19.8H }, [X2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V13.2S, V14.2S }, [X9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.4S, V16.4S }, [X12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V21.1D, V22.1D }, [X29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ st1 { V26.2D, V27.2D }, [X28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V23.8B, V24.8B }, [X4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.16B, V16.16B }, [X16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V7.4H, V8.4H }, [X7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.8H, V9.8H }, [X1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V23.2S, V24.2S }, [X7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V8.4S, V9.4S }, [X15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V14.1D, V15.1D }, [X11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V12.2D, V13.2D }, [X2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V3.8B, V4.8B }, [X28], X14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V19.16B, V20.16B }, [X13], X7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V28.4H, V29.4H }, [X14], X5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V9.8H, V10.8H }, [X28], X9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V10.2S, V11.2S }, [X10], X2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V13.4S, V14.4S }, [X8], X15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V5.1D, V6.1D }, [X9], X14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V14.2D, V15.2D }, [X24], X1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V15.8B, V16.8B, V17.8B }, [X0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V27.16B, V28.16B, V29.16B }, [X18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V13.4H, V14.4H, V15.4H }, [X7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V8.8H, V9.8H, V10.8H }, [X16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V12.2S, V13.2S, V14.2S }, [X3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V19.4S, V20.4S, V21.4S }, [X7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V5.1D, V6.1D, V7.1D }, [X3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V13.2D, V14.2D, V15.2D }, [X27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+ st1 { V3.8B, V4.8B, V5.8B }, [X21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V25.16B, V26.16B, V27.16B }, [X4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V24.4H, V25.4H, V26.4H }, [X9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V0.8H, V1.8H, V2.8H }, [X7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V3.2S, V4.2S, V5.2S }, [X4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V25.4S, V26.4S, V27.4S }, [X14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V7.1D, V8.1D, V9.1D }, [X13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V19.2D, V20.2D, V21.2D }, [X5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V5.8B, V6.8B, V7.8B }, [X17], X25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.16B, V13.16B, V14.16B }, [X29], X23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V18.4H, V19.4H, V20.4H }, [X0], X14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V16.8H, V17.8H, V18.8H }, [X1], X18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V1.2S, V2.2S, V3.2S }, [X15], X29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V2.4S, V3.4S, V4.4S }, [X29], X6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V8.1D, V9.1D, V10.1D }, [X13], X27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V8.2D, V9.2D, V10.2D }, [X18], X19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+ st1 { V21.8B, V22.8B, V23.8B, V24.8B }, [X14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V18.16B, V19.16B, V20.16B, V21.16B }, [X29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+ st1 { V23.4H, V24.4H, V25.4H, V26.4H }, [X24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V7.8H, V8.8H, V9.8H, V10.8H }, [X19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+ st1 { V6.2S, V7.2S, V8.2S, V9.2S }, [X13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V26.4S, V27.4S, V28.4S, V29.4S }, [X12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+ st1 { V0.1D, V1.1D, V2.1D, V3.1D }, [X10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+ st1 { V25.2D, V26.2D, V27.2D, V28.2D }, [X19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+ st1 { V27.8B, V28.8B, V29.8B, V30.8B }, [X17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V26.16B, V27.16B, V28.16B, V29.16B }, [X0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V18.4H, V19.4H, V20.4H, V21.4H }, [X22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.8H, V13.8H, V14.8H, V15.8H }, [X13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V13.2S, V14.2S, V15.2S, V16.2S }, [X25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V4.4S, V5.4S, V6.4S, V7.4S }, [X11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V7.1D, V8.1D, V9.1D, V10.1D }, [X13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V12.2D, V13.2D, V14.2D, V15.2D }, [X25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V21.8B, V22.8B, V23.8B, V24.8B }, [X25], X28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V26.16B, V27.16B, V28.16B, V29.16B }, [X24], X5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V20.4H, V21.4H, V22.4H, V23.4H }, [X25], X19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V20.8H, V21.8H, V22.8H, V23.8H }, [X18], X0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V4.2S, V5.2S, V6.2S, V7.2S }, [X9], X5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V7.4S, V8.4S, V9.4S, V10.4S }, [X12], X30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V23.1D, V24.1D, V25.1D, V26.1D }, [X23], X4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+ st1 { V20.2D, V21.2D, V22.2D, V23.2D }, [X7], X14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+ st1 { V1.B }[5], [X1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V0.H }[2], [X1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V31.S }[1], [X16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.D }[1], [X8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+ st1 { V15.B }[1], [X12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V16.B }[3], [X0], X2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.H }[2], [X27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V15.H }[4], [X30], X9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V3.S }[1], [X24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V26.S }[0], [X2], X30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V19.D }[1], [X9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1 { V29.D }[0], [X26], X22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+ st1b { Z7.H }, P2, [X14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z16.S }, P4, [X20, #3, MUL VL] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z17.S }, P3, [X20, X0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1b { Z0.D }, P4, [X11, Z13.D, UXTW] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1b { Z16.S }, P4, [X19, Z25.S, SXTW] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1b { Z10.D }, P3, [X12, Z21.D] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1b { Z17.S }, P7, [Z28.S] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1b { Z16.S }, P0, [Z25.S, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1b { Z15.D }, P6, [Z27.D] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1b { Z2.D }, P0, [Z21.D, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z10.D }, P2, [X26, Z5.D, SXTW #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z18.D }, P2, [X7, Z1.D, UXTW] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z9.D }, P6, [X6, Z12.D, LSL #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z3.D }, P3, [X1, Z30.D] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z18.D }, P0, [Z7.D] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1d { Z4.D }, P2, [Z2.D, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z28.S }, P3, [X18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1h { Z23.H }, P1, [X14, #-8, MUL VL] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ st1h { Z25.S }, P3, [X17, X8, LSL #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+ st1h { Z12.S }, P3, [X24, Z30.S, SXTW #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1h { Z26.D }, P5, [X9, Z17.D, UXTW #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z23.D }, P1, [X5, Z25.D, SXTW] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z14.S }, P4, [X22, Z17.S, SXTW] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1h { Z23.D }, P3, [X25, Z11.D, LSL #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z0.D }, P4, [X21, Z21.D] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z29.S }, P5, [Z9.S] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1h { Z4.S }, P7, [Z23.S, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1h { Z27.D }, P2, [Z3.D] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1h { Z11.D }, P6, [Z7.D, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z25.S }, P1, [X9, Z28.S, SXTW #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1w { Z13.D }, P3, [X16, Z9.D, SXTW #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z21.D }, P1, [X24, Z23.D, SXTW] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z17.S }, P1, [X5, Z22.S, UXTW] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1w { Z28.D }, P1, [X5, Z8.D, LSL #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z26.D }, P3, [X3, Z0.D] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z28.S }, P6, [Z21.S] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1w { Z26.S }, P3, [Z24.S, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+ st1w { Z3.D }, P0, [Z12.D] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st1w { Z17.D }, P2, [Z1.D, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+ st2 { V14.8B, V15.8B }, [X2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V6.16B, V7.16B }, [X23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st2 { V10.4H, V11.4H }, [X18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V10.8H, V11.8H }, [X18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st2 { V25.2S, V26.2S }, [X29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V26.4S, V27.4S }, [X14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st2 { V10.2D, V11.2D }, [X1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st2 { V21.8B, V22.8B }, [X22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V26.16B, V27.16B }, [X2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V19.4H, V20.4H }, [X27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V28.8H, V29.8H }, [X22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V1.2S, V2.2S }, [X26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V19.4S, V20.4S }, [X7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V22.2D, V23.2D }, [X18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V29.8B, V30.8B }, [X9], X2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V17.16B, V18.16B }, [X4], X0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V9.4H, V10.4H }, [X7], X25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V8.8H, V9.8H }, [X11], X8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V17.2S, V18.2S }, [X2], X8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V9.4S, V10.4S }, [X23], X12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V29.2D, V30.2D }, [X25], X11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st2 { V21.B, V22.B }[15], [X15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V28.H, V29.H }[2], [X6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V14.S, V15.S }[1], [X25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V17.D, V18.D }[1], [X1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+ st2 { V9.B, V10.B }[15], [X12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V19.B, V20.B }[9], [X27], X28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V18.H, V19.H }[3], [X30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V13.H, V14.H }[5], [X23], X24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V23.S, V24.S }[1], [X22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V16.S, V17.S }[3], [X12], X16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V27.D, V28.D }[0], [X16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2 { V6.D, V7.D }[1], [X14], X5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+ st2b { Z19.B, Z20.B }, P1, [X18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2b { Z26.B, Z27.B }, P7, [X15, #-6, MUL VL] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2b { Z19.B, Z20.B }, P1, [X23, X27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2d { Z29.D, Z30.D }, P4, [X8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2d { Z16.D, Z17.D }, P3, [X20, #14, MUL VL] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2d { Z17.D, Z18.D }, P7, [X2, X28, LSL #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2h { Z5.H, Z6.H }, P7, [X23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2h { Z11.H, Z12.H }, P6, [X4, #10, MUL VL] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2h { Z3.H, Z4.H }, P3, [X22, X16, LSL #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 1.0 V1UnitL01[2],V1UnitS[2],V1UnitV[2]
+ st2w { Z14.S, Z15.S }, P4, [X17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2w { Z9.S, Z10.S }, P5, [X19, #-8, MUL VL] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st2w { Z5.S, Z6.S }, P3, [X23, X13, LSL #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+ st3 { V10.8B, V11.8B, V12.8B }, [X18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V26.16B, V27.16B, V28.16B }, [X4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V25.4H, V26.4H, V27.4H }, [X11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V0.8H, V1.8H, V2.8H }, [X0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V19.2S, V20.2S, V21.2S }, [X30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V24.4S, V25.4S, V26.4S }, [X8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V24.2D, V25.2D, V26.2D }, [X25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+ st3 { V25.8B, V26.8B, V27.8B }, [X23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V9.16B, V10.16B, V11.16B }, [X26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V24.4H, V25.4H, V26.4H }, [X3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V23.8H, V24.8H, V25.8H }, [X22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V7.2S, V8.2S, V9.2S }, [X8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V11.4S, V12.4S, V13.4S }, [X15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V1.2D, V2.2D, V3.2D }, [X4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V16.8B, V17.8B, V18.8B }, [X26], X2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V9.16B, V10.16B, V11.16B }, [X3], X18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V2.4H, V3.4H, V4.4H }, [X4], X4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V27.8H, V28.8H, V29.8H }, [X27], X8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V26.2S, V27.2S, V28.2S }, [X2], X25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V5.4S, V6.4S, V7.4S }, [X18], X29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V26.2D, V27.2D, V28.2D }, [X14], X5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+ st3 { V8.B, V9.B, V10.B }[4], [X18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V11.H, V12.H, V13.H }[4], [X0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V9.S, V10.S, V11.S }[2], [X20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V16.D, V17.D, V18.D }[0], [X13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st3 { V26.B, V27.B, V28.B }[1], [X12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V27.B, V28.B, V29.B }[15], [X19], X23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V24.H, V25.H, V26.H }[2], [X14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V1.H, V2.H, V3.H }[2], [X0], X23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V25.S, V26.S, V27.S }[2], [X10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V8.S, V9.S, V10.S }[0], [X11], X20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V19.D, V20.D, V21.D }[1], [X5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3 { V10.D, V11.D, V12.D }[0], [X12], X11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st3b { Z0.B, Z1.B, Z2.B }, P6, [X26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3b { Z22.B, Z23.B, Z24.B }, P6, [X25, #3, MUL VL] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3b { Z14.B, Z15.B, Z16.B }, P2, [X29, X27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+ st3d { Z6.D, Z7.D, Z8.D }, P2, [X12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3d { Z20.D, Z21.D, Z22.D }, P5, [X15, #9, MUL VL] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3d { Z15.D, Z16.D, Z17.D }, P7, [X0, X9, LSL #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+ st3h { Z17.H, Z18.H, Z19.H }, P3, [X14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3h { Z21.H, Z22.H, Z23.H }, P0, [X15, #6, MUL VL] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3h { Z2.H, Z3.H, Z4.H }, P3, [X21, X9, LSL #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+ st3w { Z9.S, Z10.S, Z11.S }, P3, [X29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3w { Z11.S, Z12.S, Z13.S }, P4, [X13, #15, MUL VL] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+ st3w { Z19.S, Z20.S, Z21.S }, P2, [X22, X28, LSL #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+ st4 { V17.8B, V18.8B, V19.8B, V20.8B }, [X8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+ st4 { V7.16B, V8.16B, V9.16B, V10.16B }, [X15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+ st4 { V5.4H, V6.4H, V7.4H, V8.4H }, [X13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+ st4 { V11.8H, V12.8H, V13.8H, V14.8H }, [X1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+ st4 { V15.2S, V16.2S, V17.2S, V18.2S }, [X18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+ st4 { V21.4S, V22.4S, V23.4S, V24.4S }, [X6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+ st4 { V25.2D, V26.2D, V27.2D, V28.2D }, [X16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 2 4 4 0.25 V1UnitV01[8],V1UnitL01[8]
+ st4 { V16.8B, V17.8B, V18.8B, V19.8B }, [X24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V2.16B, V3.16B, V4.16B, V5.16B }, [X13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V17.4H, V18.4H, V19.4H, V20.4H }, [X3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V18.8H, V19.8H, V20.8H, V21.8H }, [X5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V26.2S, V27.2S, V28.2S, V29.2S }, [X17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V21.4S, V22.4S, V23.4S, V24.4S }, [X7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V27.2D, V28.2D, V29.2D, V30.2D }, [X25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+ st4 { V24.8B, V25.8B, V26.8B, V27.8B }, [X24], X8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V2.16B, V3.16B, V4.16B, V5.16B }, [X21], X21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V11.4H, V12.4H, V13.4H, V14.4H }, [X29], X3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V16.8H, V17.8H, V18.8H, V19.8H }, [X13], X3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V13.2S, V14.2S, V15.2S, V16.2S }, [X0], X0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+ st4 { V26.4S, V27.4S, V28.4S, V29.4S }, [X1], X22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+ st4 { V18.2D, V19.2D, V20.2D, V21.2D }, [X10], X28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+ st4 { V10.B, V11.B, V12.B, V13.B }[3], [X5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+ st4 { V5.H, V6.H, V7.H, V8.H }[4], [X13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+ st4 { V22.S, V23.S, V24.S, V25.S }[0], [X7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+ st4 { V23.D, V24.D, V25.D, V26.D }[1], [X5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+ st4 { V22.B, V23.B, V24.B, V25.B }[0], [X29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V6.B, V7.B, V8.B, V9.B }[9], [X26], X21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V19.H, V20.H, V21.H, V22.H }[2], [X18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V6.H, V7.H, V8.H, V9.H }[4], [X9], X9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V19.S, V20.S, V21.S, V22.S }[2], [X27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V22.S, V23.S, V24.S, V25.S }[0], [X29], X21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V10.D, V11.D, V12.D, V13.D }[0], [X16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4 { V10.D, V11.D, V12.D, V13.D }[0], [X12], X11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+ st4b { Z22.B, Z23.B, Z24.B, Z25.B }, P0, [X0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4b { Z1.B, Z2.B, Z3.B, Z4.B }, P7, [X1, #20, MUL VL] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4b { Z28.B, Z29.B, Z30.B, Z31.B }, P4, [X27, X20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+ st4d { Z19.D, Z20.D, Z21.D, Z22.D }, P1, [X11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4d { Z0.D, Z1.D, Z2.D, Z3.D }, P6, [X7, #-24, MUL VL] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4d { Z28.D, Z29.D, Z30.D, Z31.D }, P5, [X19, X20, LSL #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+ st4h { Z14.H, Z15.H, Z16.H, Z17.H }, P1, [X24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4h { Z27.H, Z28.H, Z29.H, Z30.H }, P3, [X26, #16, MUL VL] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4h { Z2.H, Z3.H, Z4.H, Z5.H }, P5, [X30, X17, LSL #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+ st4w { Z3.S, Z4.S, Z5.S, Z6.S }, P0, [X0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4w { Z5.S, Z6.S, Z7.S, Z8.S }, P2, [X0, #-20, MUL VL] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+ st4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5, [X5, X18, LSL #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+ stlrb W19, [X26] // STLRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlrb W9, [X19, #0] // STLRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlrh W4, [X7] // STLRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlrh W20, [X5, #0] // STLRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlur W3, [X27] // STLUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlur W0, [X15, #-14] // STLUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlur X23, [X25] // STLUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlur X18, [X6, #101] // STLUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlurb W30, [X17] // STLURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlurb W25, [X21, #-8] // STLURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlurh W9, [X29] // STLURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlurh W6, [X27, #-224] // STLURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+ stlxp W26, W11, W12, [X7] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxp W24, W10, W16, [X8, #0] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxp W1, X25, X26, [X10] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxp W10, X7, X20, [X22, #0] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxr W23, W8, [X6] // STLXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxr W29, W28, [X26, #0] // STLXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxr W23, X8, [X7] // STLXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxr W14, X18, [X23, #0] // STLXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxrb W2, W7, [X10] // STLXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxrb W0, W1, [X20, #0] // STLXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stlxrh W16, W17, [X21] // STLXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stlxrh W12, W26, [X23, #0] // STLXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stnp S29, S16, [X11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp S17, S19, [X27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp D4, D3, [X30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp D25, D31, [X28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp Q28, Q22, [X3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp Q17, Q15, [X16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stnp W29, W25, [X5] // STNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp W16, W18, [X27, #-232] // STNP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp X20, X16, [X8] // STNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnp X6, X20, [X15, #-120] // STNP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stnt1b { Z18.B }, P7, [X21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1b { Z9.B }, P6, [X26, #-7, MUL VL] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1b { Z18.B }, P1, [X1, X20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z16.D }, P3, [X3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z27.D }, P4, [X16, #-6, MUL VL] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1d { Z11.D }, P0, [X18, X22, LSL #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z27.H }, P5, [X16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z2.H }, P2, [X30, #-8, MUL VL] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1h { Z0.H }, P1, [X7, X1, LSL #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+ stnt1w { Z9.S }, P3, [X20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1w { Z12.S }, P4, [X11, #-6, MUL VL] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stnt1w { Z28.S }, P6, [X6, X0, LSL #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ stp S10, S19, [X13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp D19, D20, [X30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp Q3, Q17, [X14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+ stp S19, S24, [X27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp D16, D21, [X28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ stp Q10, Q31, [X0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+ stp S27, S11, [X30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp D30, D19, [X25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp Q25, Q3, [X27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp S29, S13, [X0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp D15, D12, [X20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp Q13, Q16, [X3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stp W18, W8, [X6], #196 // STP <Wt1>, <Wt2>, [<Xn|SP>], #<imms> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp X10, X17, [X7], #-328 // STP <Xt1>, <Xt2>, [<Xn|SP>], #<immd> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp W4, W3, [X0, #-36]! // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp X14, X13, [X24, #-272]! // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ stp W27, W30, [X20] // STP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp X3, X6, [X16] // STP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp W9, W14, [X10, #-24] // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stp X27, X4, [X14, #-448] // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W14, [X2], #-72 // STR <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ str X28, [X14], #-130 // STR <Xt>, [<Xn|SP>], #<simm> \\ Store register, immed post-index \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W9, [X29, #-227]! // STR <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ str X13, [X5, #233]! // STR <Xt>, [<Xn|SP>, #<simm>]! \\ Store register, immed pre-index \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W2, [X30] // STR <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X2, #1796] // STR <Wt>, [<Xn|SP>, #<pimm32>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X22, [X29] // STR <Xt>, [<Xn|SP>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X2, [X10, #9472] // STR <Xt>, [<Xn|SP>, #<pimm64>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str B21, [X28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H13, [X10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S14, [X8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D24, [X10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q20, [X30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B9, [X24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H0, [X4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S19, [X23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D20, [X2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q24, [X20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B5, [X11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B20, [X23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H23, [X15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H24, [X6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S25, [X19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S2, [X14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D15, [X2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D27, [X7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q13, [X16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q4, [X7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str P4, [X5] // STR <Pt>, [<Xn|SP>] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+ str P3, [X21, #-78, MUL VL] // STR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+ str W14, [X9, X17] // STR <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X5, [X0, X22] // STR <Xt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W24, [X21, W29, UXTW] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X27, [X26, W24, UXTW] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X29, W29, SXTW] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X25, [X1, W24, SXTW] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W2, [X24, X12, SXTX] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X3, [X24, X27, SXTX] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W29, [X30, W30, UXTW #2] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X5, [X13, W8, UXTW #3] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W28, [X7, W24, SXTW #2] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X23, [X2, W26, SXTW #3] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W11, [X8, X30, SXTX #2] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X20, [X4, X2, SXTX #3] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str W8, [X11, X10, LSL #2] // STR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str X27, [X2, X11, LSL #3] // STR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ str B14, [X13, X25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B30, [X16, W26, UXTW] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B20, [X19, W3, SXTW] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str B13, [X29, X19, SXTX] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H16, [X5, X24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H15, [X15, W15, UXTW] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H3, [X6, W15, SXTW] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H2, [X1, X28, SXTX] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str H30, [X29, W30, UXTW #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H10, [X21, W11, SXTW #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H0, [X15, X9, SXTX #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str H13, [X0, X26, LSL #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str S2, [X16, X17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S20, [X24, W10, UXTW] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S4, [X9, W14, SXTW] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S3, [X23, X26, SXTX] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S27, [X17, W9, UXTW #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S0, [X11, W20, SXTW #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S20, [X17, X14, SXTX #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str S0, [X15, X28, LSL #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D5, [X26, X6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D11, [X9, W5, UXTW] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D16, [X20, W8, SXTW] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D0, [X12, X9, SXTX] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D4, [X21, W25, UXTW #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D28, [X20, W4, SXTW #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D20, [X13, X23, SXTX #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str D31, [X19, X28, LSL #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q13, [X24, X1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q1, [X25, W9, UXTW] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q25, [X20, W15, SXTW] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q25, [X0, X15, SXTX] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ str Q6, [X13, W0, UXTW #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q27, [X4, W15, SXTW #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q3, [X23, X0, SXTX #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Q27, [X1, X28, LSL #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+ str Z3, [X0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ str Z8, [X6, #188, MUL VL] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+ strb W23, [X11], #34 // STRB <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strb W5, [X19, #-175]! // STRB <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strb W18, [X30] // STRB <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W12, [X9, #2315] // STRB <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W5, [X26, W7, UXTW] // STRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W18, [X2, W28, SXTW] // STRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W21, [X21, X7, SXTX] // STRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strb W9, [X6, X21] // STRB <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W21, [X8], #192 // STRH <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strh W8, [X26, #-204]! // STRH <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+ strh W6, [X7] // STRH <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W0, [X19, #7514] // STRH <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W12, [X0, X11] // STRH <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W5, [X18, W8, UXTW] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W28, [X29, W0, SXTW] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W7, [X17, X0, SXTX] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ strh W7, [X2, W14, UXTW #1] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+ strh W7, [X16, W29, SXTW #1] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+ strh W5, [X1, X13, SXTX #1] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+ strh W14, [X28, X2, LSL #1] // STRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Store register, register offset, scaled by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+ sttr W17, [X20] // STTR <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr W14, [X30, #-35] // STTR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr X10, [X16] // STTR <Xt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttr X16, [X8, #-25] // STTR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrb W13, [X2] // STTRB <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrb W0, [X20, #-114] // STTRB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrh W26, [X11] // STTRH <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sttrh W11, [X30, #-78] // STTRH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur B29, [X8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur B5, [X0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur H10, [X15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur H10, [X12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur S10, [X4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur S9, [X14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur D1, [X28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur D6, [X6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur Q6, [X16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur Q5, [X13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+ stur W29, [X27] // STUR <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur W14, [X2, #-34] // STUR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur X29, [X10] // STUR <Xt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stur X30, [X25, #127] // STUR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturb W21, [X5] // STURB <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturb W25, [X26, #-117] // STURB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturh W0, [X11] // STURH <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ sturh W7, [X10, #-209] // STURH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+ stxp W29, W24, W6, [X9] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxp W26, W19, W22, [X11, #0] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxp W30, X6, X3, [X1] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxp W7, X2, X10, [X25, #0] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxr W19, W21, [X9] // STXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxr W25, W1, [X24, #0] // STXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxr W25, X30, [X28] // STXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxr W30, X20, [X23, #0] // STXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxrb W0, W26, [X10] // STXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxrb W10, W16, [X25, #0] // STXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ stxrh W0, W20, [X8] // STXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+ stxrh W12, W14, [X1, #0] // STXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+ sub W13, WSP, W10 // SUB <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub W22, WSP, W13, UXTB // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub W18, WSP, W23, SXTB #1 // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub W13, WSP, W8, LSL #4 // SUB <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub X6, X8, X22 // SUB <Xd>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X16, X2, W19, UXTB // SUB <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X16, X3, W27, UXTB #2 // SUB <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+ sub X4, X13, X16, LSL #3 // SUB <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ sub WSP, WSP, #50 // SUB <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub WSP, WSP, #84, LSL #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X18, X22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub X17, X20, #184, LSL #0 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+ sub Z18.B, Z18.B, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z22.S, Z22.S, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z15.H, Z15.H, #196, LSL #8 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub W0, W21, W2, LSL #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+ sub W22, W7, W13, LSL #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ sub W1, W18, W16, ASR #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+ sub X27, X29, X16, LSL #1 // SUB <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+ sub X24, X10, X15, LSL #35 // SUB <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ sub X24, X19, X13, LSR #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+ sub D18, D25, D0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sub V15.2S, V14.2S, V11.2S // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ sub Z18.H, P4/M, Z18.H, Z7.H // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z29.B, Z19.B, Z8.B // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subhn V7.4H, V10.4S, V13.4S // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ subhn2 V24.4S, V24.2D, V8.2D // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ subr Z13.B, Z13.B, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z17.S, Z17.S, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z15.D, Z15.D, #100, LSL #0 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z21.D, P7/M, Z21.D, Z24.D // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subs W25, WSP, W13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W10, WSP, W9, UXTH // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W20, WSP, W3, SXTH #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W12, WSP, W27, LSL #4 // SUBS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X16, X20, X21 // SUBS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X15, X2, W11, UXTB // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X13, X15, X14, SXTX #1 // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs X30, X1, X26, LSL #3 // SUBS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W25, WSP, #239 // SUBS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W13, WSP, #75, LSL #12 // SUBS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs X9, X3, #173 // SUBS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X30, X25, #82, LSL #12 // SUBS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs W16, W27, W25 // SUBS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W0, W30, W27, LSL #4 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs W17, W27, W3, LSL #20 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs W27, W7, W27, ASR #5 // SUBS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs X21, X22, X17 // SUBS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X18, X1, X5, LSL #0 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ subs X28, X26, X4, LSL #49 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ subs X26, X14, X30, LSR #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ sudot V4.2S, V20.8B, V18.4B[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ sudot Z5.S, Z30.B, Z3.B[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ sunpkhi Z22.D, Z16.S // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ sunpklo Z10.H, Z0.B // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ suqadd B15, B21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ suqadd V26.16B, V27.16B // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ svc #35275 // SVC #<imm> \\ No description \\ No scheduling info
+ sxtb W7, W20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtb X18, W14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtb Z16.H, P5/M, Z15.H // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxth Z4.S, P7/M, Z11.S // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxtw Z12.D, P1/M, Z16.D // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxth W23, W2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxth X22, W17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sxtl V4.8H, V21.8B // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sxtl2 V20.2D, V30.4S // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ sxtw X18, W22 // SXTW <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ sys #6, C6, C0, #3 // SYS #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+ sys #7, C12, C5, #3, X8 // SYS #<op1>, <Cn>, <Cm>, #<op2>, <Xt> \\ No description \\ No scheduling info
+ sysl X16, #5, C11, C8, #5 // SYSL <Xt>, #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+ tbl V7.8B, { V2.16B, V3.16B }, V17.8B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+ tbl V3.16B, { V10.16B, V11.16B, V12.16B }, V29.16B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 1 4 4 1.0 V1UnitV01[2]
+ tbl V9.8B, { V22.16B, V23.16B, V24.16B, V25.16B }, V14.8B // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 1 4 4 0.67 V1UnitV01[3]
+ tbl V29.16B, { V3.16B }, V17.16B // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+ tbnz W3, #28, test // TBNZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbnz X30, #48, test // TBNZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbx V25.8B, { V13.16B, V14.16B }, V30.8B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 1 4 4 1.0 V1UnitV01[2]
+ tbx V22.16B, { V3.16B, V4.16B, V5.16B }, V25.16B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 1 6 6 0.67 V1UnitV01[3]
+ tbx V23.16B, { V0.16B, V1.16B, V2.16B, V3.16B }, V26.16B // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 1 6 6 0.4 V1UnitV01[5]
+ tbx V16.8B, { V21.16B }, V18.8B // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 1 2 2 2.0 V1UnitV01
+ tbz W17, #16, test // TBZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tbz X22, #41, test // TBZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+ tlbi VMALLE1 // TLBI <tlbi_op> \\ No description \\ No scheduling info
+ tlbi IPAS2E1IS, X7 // TLBI <tlbi_op2>, <Xt> \\ No description \\ No scheduling info
+ trn1 V30.2S, V21.2S, V25.2S // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+ trn1 P1.S, P4.S, P0.S // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+ trn2 P0.H, P5.H, P7.H // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+ trn2 V27.2D, V29.2D, V10.2D // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+ tst W25, #0xe00 // TST <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ tst X3, #0x1e00 // TST <Xn>, #<immd> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ tst W9, W14 // TST <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ tst W10, W3, ASR #16 // TST <Wn>, <Wm>, <shift> #<wamount> \\ Test/Compare, shift by immed \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ tst X11, X28 // TST <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+ tst X9, X7, ASR #33 // TST <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+ uaba V13.16B, V14.16B, V19.16B // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+ uabal V13.2D, V16.2S, V11.2S // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ uabal2 V17.4S, V0.8H, V1.8H // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+ uabd V23.4S, V4.4S, V30.4S // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+ uabd Z5.B, P5/M, Z5.B, Z10.B // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uabdl V13.4S, V26.4H, V7.4H // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ uabdl2 V15.2D, V9.4S, V10.4S // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+ uadalp V31.1D, V14.2S // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+ uaddl V29.8H, V8.8B, V31.8B // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddl2 V15.4S, V22.8H, V14.8H // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddlp V15.1D, V5.2S // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+ uaddlv H24, V24.8B // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ uaddlv H19, V31.16B // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ uaddlv S12, V24.4H // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uaddlv S30, V0.8H // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ uaddlv D6, V19.4S // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uaddv D9, P5, Z1.B // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ uaddv D26, P0, Z25.H // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ uaddv D4, P1, Z1.S // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ uaddv D28, P6, Z6.D // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ uaddw V17.2D, V9.2D, V12.2S // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uaddw2 V15.4S, V13.4S, V4.8H // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ ubfiz W11, W6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+ ubfiz X27, X15, #49, #9 // UBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+ ubfm W19, W16, #25, #24 // UBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ ubfm X4, X30, #59, #50 // UBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+ ubfx W13, W18, #25, #3 // UBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ ubfx X23, X26, #59, #5 // UBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+ ucvtf H8, W24, #16 // UCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S7, W16, #29 // UCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D5, W17, #23 // UCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H13, X17, #12 // UCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S25, X2, #37 // UCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D20, X11, #43 // UCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H30, W4 // UCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S22, W8 // UCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D8, W15 // UCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H17, X12 // UCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf S8, X0 // UCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf D22, X17 // UCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+ ucvtf H22, H16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf S17, S18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf D19, D1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V18.4H, V11.4H, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ ucvtf V22.8H, V20.8H, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ ucvtf V16.2S, V17.2S, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V17.4S, V23.4S, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ ucvtf V18.2D, V20.2D, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf H7, H21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf S25, S7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf D30, D29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V9.4H, V25.4H // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ ucvtf V24.8H, V31.8H // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+ ucvtf V14.2S, V2.2S // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf V20.4S, V0.4S // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+ ucvtf V27.2D, V3.2D // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+ ucvtf Z31.H, P5/M, Z30.H // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
+ ucvtf Z23.H, P7/M, Z9.S // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ ucvtf Z1.S, P1/M, Z10.S // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ ucvtf Z24.D, P5/M, Z9.S // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+ ucvtf Z30.H, P2/M, Z24.D // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ ucvtf Z9.S, P5/M, Z9.D // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ ucvtf Z18.D, P6/M, Z19.D // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ udiv W12, W17, W22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
+ udiv X7, X2, X23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
+ udiv Z30.S, P5/M, Z30.S, Z10.S // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+ udiv Z31.D, P5/M, Z31.D, Z29.D // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+ udivr Z19.S, P4/M, Z19.S, Z8.S // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+ udivr Z3.D, P5/M, Z3.D, Z8.D // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+ udot Z0.S, Z5.B, Z4.B[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ udot Z19.D, Z1.H, Z13.H[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ udot Z22.S, Z29.B, Z4.B // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+ udot Z9.D, Z1.H, Z11.H // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ udot V10.2S, V11.8B, V21.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ udot V7.4S, V21.16B, V6.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ udot V19.2S, V31.8B, V17.8B // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+ uhadd V10.8H, V7.8H, V7.8H // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uhsub V12.4H, V16.4H, V28.4H // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ umaddl X9, W28, W9, X19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ umax Z8.B, Z8.B, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umax Z27.B, P1/M, Z27.B, Z13.B // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umax V7.16B, V11.16B, V7.16B // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ umaxp V15.8H, V8.8H, V12.8H // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ umaxv B19, V7.8B // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ umaxv B12, V10.16B // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ umaxv H27, V5.4H // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ umaxv H11, V22.8H // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ umaxv S5, V25.4S // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ umaxv B9, P7, Z19.B // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ umaxv H8, P7, Z26.H // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ umaxv S15, P2, Z28.S // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ umaxv D11, P4, Z11.D // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ umin Z21.S, Z21.S, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umin Z31.S, P2/M, Z31.S, Z4.S // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umin V0.16B, V26.16B, V2.16B // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ uminp V28.4S, V16.4S, V15.4S // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+ uminv B23, V21.8B // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ uminv B3, V10.16B // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+ uminv H6, V22.4H // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uminv H23, V3.8H // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+ uminv S29, V19.4S // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+ uminv B2, P5, Z8.B // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+ uminv H28, P0, Z0.H // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+ uminv S10, P1, Z29.S // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ uminv D24, P5, Z29.D // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ umlal V22.4S, V14.4H, V0.H[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal V28.2D, V31.2S, V0.S[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V31.4S, V7.8H, V15.H[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V10.2D, V4.4S, V3.S[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal V29.4S, V20.4H, V30.4H // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlal2 V10.2D, V28.4S, V19.4S // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V21.4S, V12.4H, V7.H[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V20.2D, V20.2S, V2.S[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V27.4S, V28.8H, V6.H[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V30.2D, V23.4S, V1.S[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl V11.2D, V23.2S, V1.2S // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ umlsl2 V11.8H, V20.16B, V2.16B // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+ ummla V14.4S, V17.16B, V25.16B // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ umnegl X23, W5, W23 // UMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ umov W6, V22.B[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov W29, V0.B[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov W10, V25.H[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov W6, V7.H[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov W8, V8.S[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov W20, V1.S[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov X20, V11.D[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umov X29, V7.D[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+ umsubl X21, W16, W28, X6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+ umulh Z20.B, P4/M, Z20.B, Z6.B // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z30.H, P6/M, Z30.H, Z15.H // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z11.S, P7/M, Z11.S, Z8.S // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+ umulh Z3.D, P3/M, Z3.D, Z2.D // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ umulh X23, X22, X19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+ umull X5, W17, W23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+ umull V27.4S, V1.4H, V8.H[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull V22.2D, V28.2S, V6.S[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V18.4S, V26.8H, V10.H[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V28.2D, V21.4S, V1.S[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull V23.4S, V26.4H, V19.4H // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ umull2 V11.8H, V29.16B, V29.16B // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+ uqadd Z18.B, Z18.B, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z2.S, Z2.S, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z24.S, Z24.S, #56, LSL #0 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z6.H, Z28.H, Z5.H // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd S0, S24, S30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqadd V14.2D, V22.2D, V20.2D // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqdecb W10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb W8, VL3 // UQDECB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb W3, VL32, MUL #1 // UQDECB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X8 // UQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X3, VL5 // UQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecb X22, MUL3, MUL #2 // UQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W11 // UQDECD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W27, VL256 // UQDECD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd W6, VL32, MUL #10 // UQDECD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X12, VL8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd X10, VL64, MUL #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecd Z0.D // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecd Z8.D, VL3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecd Z27.D, VL16, MUL #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdech W30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech W28, MUL3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech W5, VL5, MUL #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X15, VL7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech X17, VL256, MUL #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdech Z5.H // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdech Z16.H, VL128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdech Z27.H, VL128, MUL #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecp W19, P5.H // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqdecp X1, P1.B // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqdecp Z20.S, P0 // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ uqdecw W17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw W11, VL256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw W13, MUL4, MUL #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X28, VL32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw X0, VL256, MUL #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqdecw Z29.S // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecw Z22.S, VL2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecw Z20.S, VL2, MUL #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincb W2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb W21, VL128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb W0, ALL, MUL #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X24 // UQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X18, VL7 // UQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincb X13, VL256, MUL #13 // UQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W23 // UQINCD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W27, VL4 // UQINCD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd W7, VL32, MUL #16 // UQINCD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X29, MUL4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd X20, POW2, MUL #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincd Z29.D // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincd Z4.D, VL64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincd Z12.D, VL6, MUL #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqinch W4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch W23, MUL3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch W27, VL7, MUL #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X13, MUL3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch X5, MUL4, MUL #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqinch Z21.H // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqinch Z1.H, VL8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqinch Z7.H, VL7, MUL #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincp W4, P5.D // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqincp X13, P5.D // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+ uqincp Z1.S, P0 // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ uqincw W13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw W26, VL8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw W3, VL16, MUL #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X13, VL256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw X29, VL7, MUL #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+ uqincw Z26.S // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincw Z31.S, VL5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincw Z12.S, VL7, MUL #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqrshl S17, S5, S8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshl V25.8B, V13.8B, V23.8B // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn B12, H9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn H1, S28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn S1, D4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn V17.8B, V24.8H, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn V29.4H, V25.4S, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn V16.2S, V0.2D, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V5.16B, V28.8H, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V28.8H, V22.4S, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqrshrn2 V20.4S, V13.2D, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl B16, B25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl H22, H27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl S9, S5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl D25, D1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V25.16B, V0.16B, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V1.4H, V12.4H, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V23.2S, V4.2S, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V28.2D, V23.2D, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl B22, B26, B2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshl V8.4H, V17.4H, V13.4H // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn B16, H27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn H4, S2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn S0, D15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn V19.8B, V26.8H, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn V31.4H, V17.4S, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn V1.2S, V11.2D, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V23.16B, V16.8H, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V1.8H, V12.4S, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqshrn2 V30.4S, V29.2D, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ uqsub Z26.B, Z26.B, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z19.S, Z19.S, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z15.H, Z15.H, #104, LSL #8 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z25.D, Z13.D, Z19.D // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub S16, S21, S6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqsub V19.4S, V0.4S, V5.4S // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ uqxtn S3, D27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ uqxtn V26.2S, V5.2D // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ uqxtn2 V15.16B, V22.8H // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+ urecpe V10.2S, V8.2S // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+ urecpe V1.4S, V23.4S // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+ urhadd V16.2S, V19.2S, V2.2S // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ urshl D24, D22, D29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ urshl V31.8B, V5.8B, V3.8B // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr D23, D19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V23.16B, V14.16B, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V16.4H, V13.4H, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V10.4S, V10.4S, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ urshr V2.2D, V16.2D, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+ ursqrte V15.2S, V20.2S // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+ ursqrte V31.4S, V14.4S // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+ ursra D24, D24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V14.8B, V18.8B, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V9.4H, V9.4H, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V25.2S, V17.2S, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ ursra V17.2D, V16.2D, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usdot V0.2S, V18.8B, V10.4B[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ usdot Z5.S, Z25.B, Z2.B[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ usdot V17.2S, V0.8B, V29.8B // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+ usdot Z8.S, Z6.B, Z18.B // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+ ushl D7, D17, D3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ ushl V6.8B, V26.8B, V6.8B // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V18.8H, V24.8B, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V12.4S, V10.4H, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll V16.2D, V16.2S, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V14.8H, V3.16B, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V18.4S, V22.8H, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushll2 V31.2D, V12.4S, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr D23, D22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V24.8B, V0.8B, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V21.8H, V31.8H, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V27.2S, V24.2S, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ ushr V0.2D, V27.2D, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ usmmla V25.4S, V10.16B, V11.16B // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+ usqadd H14, H13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ usqadd V18.2D, V23.2D // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+ usra D22, D24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V16.16B, V5.16B, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V18.4H, V22.4H, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V13.2S, V12.2S, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usra V30.2D, V30.2D, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+ usubl V22.4S, V18.4H, V3.4H // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubl2 V12.8H, V23.16B, V15.16B // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubw V30.8H, V12.8H, V20.8B // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ usubw2 V2.4S, V0.4S, V30.8H // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+ uunpkhi Z26.D, Z26.S // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ uunpklo Z10.S, Z11.H // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ uxtb W2, W23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ uxtb Z1.D, P2/M, Z11.D // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxth Z6.S, P3/M, Z18.S // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxtw Z23.D, P4/M, Z3.D // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxth W7, W14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+ uxtl V1.4S, V22.4H // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ uxtl2 V14.8H, V3.16B // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+ uzp1 V9.2S, V29.2S, V20.2S // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ uzp1 P5.D, P3.D, P5.D // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ uzp2 P6.S, P0.S, P6.S // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ uzp2 V18.4S, V12.4S, V31.4S // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ wfe // WFE \\ No description \\ No scheduling info
+ wfi // WFI \\ No description \\ No scheduling info
+ whilele P6.H, X28, X30 // WHILELE <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+ whilelo P3.B, X9, X7 // WHILELO <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+ whilels P4.B, W4, W20 // WHILELS <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+ whilelt P7.S, X20, X6 // WHILELT <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+ wrffr P7.B // WRFFR <Pn>.B \\ Write to first fault register \\ 1 2 2 1.0 V1UnitM0
+ xtn V20.8B, V17.8H // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+ xtn2 V31.16B, V26.8H // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+ yield // YIELD \\ No description \\ No scheduling info
+ zip1 V21.2D, V4.2D, V11.2D // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+ zip1 P0.D, P1.D, P4.D // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ zip2 P3.S, P5.S, P4.S // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+ zip2 V2.4S, V20.4S, V5.4S // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+
+.Lfunc_end0:
+ .size test, .Lfunc_end0-test
+ .cfi_endproc
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 376600
+# CHECK-NEXT: Total Cycles: 292826
+# CHECK-NEXT: Total uOps: 532700
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 1.82
+# CHECK-NEXT: IPC: 1.29
+# CHECK-NEXT: Block RThroughput: 854.0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - V1UnitB:2
+# CHECK-NEXT: [1] - V1UnitD:2
+# CHECK-NEXT: [2] - V1UnitFlg:3
+# CHECK-NEXT: [3] - V1UnitI:4 V1UnitS, V1UnitS, V1UnitM0, V1UnitM1
+# CHECK-NEXT: [4] - V1UnitL:3 V1UnitL01, V1UnitL01, V1UnitL2
+# CHECK-NEXT: [5] - V1UnitL2:1
+# CHECK-NEXT: [6] - V1UnitL01:2
+# CHECK-NEXT: [7] - V1UnitM:2 V1UnitM0, V1UnitM1
+# CHECK-NEXT: [8] - V1UnitM0:1
+# CHECK-NEXT: [9] - V1UnitM1:1
+# CHECK-NEXT: [10] - V1UnitS:2
+# CHECK-NEXT: [11] - V1UnitV:4 V1UnitV0, V1UnitV1, V1UnitV2, V1UnitV3
+# CHECK-NEXT: [12] - V1UnitV0:1
+# CHECK-NEXT: [13] - V1UnitV1:1
+# CHECK-NEXT: [14] - V1UnitV2:1
+# CHECK-NEXT: [15] - V1UnitV3:1
+# CHECK-NEXT: [16] - V1UnitV01:2 V1UnitV0, V1UnitV1
+# CHECK-NEXT: [17] - V1UnitV02:2 V1UnitV0, V1UnitV2
+# CHECK-NEXT: [18] - V1UnitV13:2 V1UnitV1, V1UnitV3
+
+# CHECK: Scheduling Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: Bypass Latency
+# CHECK-NEXT: [4]: Throughput
+# CHECK-NEXT: [5]: Resources
+# CHECK-NEXT: [6]: LLVM OpcodeName
+# CHECK-NEXT: [7]: Instruction
+# CHECK-NEXT: [8]: Comment if any
+# CHECK-NEXT: [1] [2] [3] [4] [5] [6] [7] [8]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv1i64 | abs d15, d11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv2i32 | abs v25.2s, v25.2s // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ABS_ZPmZ_B | abs z26.b, p6/m, z27.b // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCWr | adc w13, w6, w4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCXr | adc x8, x12, x10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADCSWr | adcs w29, w7, w30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADCSXr | adcs x11, x3, x5 // ADCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrx | add wsp, wsp, w10 // ADD <Wd|WSP>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrx | add wsp, wsp, w2, uxtb // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrx | add wsp, wsp, w13, uxth #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrx | add wsp, wsp, w13, lsl #4 // ADD <Wd|WSP>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x22, x2, x27 // ADD <Xd|SP>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrx | add x25, x9, w25, uxtb // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrx | add x4, x28, w3, uxtb #3 // ADD <Xd|SP>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x0, x28, x26, lsl #3 // ADD <Xd|SP>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | add wsp, wsp, #3765 // ADD <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | add wsp, wsp, #3547, lsl #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x2, #319, lsl #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z13.d, z13.d, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z16.d, z16.d, #59648 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrs | add w3, w2, w21, lsl #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w6, w21, w17, lsl #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w28, w30, w19, asr #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x8, x3, x28, lsl #3 // ADD <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x12, x13, x0, lsl #44 // ADD <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x5, x20, x28, lsr #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv1i64 | add d0, d23, d21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv4i32 | add v19.4s, v24.4s, v15.4s // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZPmZ_D | add z29.d, p5/m, z29.d, z29.d // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZZZ_H | add z10.h, z22.h, z13.h // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv4i32_v4i16 | addhn v26.4h, v5.4s, v9.4s // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv8i16_v16i8 | addhn2 v1.16b, v19.8h, v6.8h // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i64p | addp d1, v14.2d // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i32 | addp v7.2s, v1.2s, v2.2s // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDPL_XXI | addpl x27, x6, #-6 // ADDPL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w17, wsp, w25 // ADDS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w6, wsp, w15, uxth // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w22, wsp, w30, uxtb #2 // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w12, wsp, w29, lsl #4 // ADDS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x14, x0, x10 // ADDS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | adds x13, x23, w8, uxtb // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrx | adds x4, x26, w28, uxtb #1 // ADDS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x10, x3, x29, lsl #2 // ADDS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | adds w23, wsp, #502 // ADDS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWri | adds w2, wsp, #2980, lsl #12 // ADDS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | adds x12, x4, #1345 // ADDS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXri | adds x25, x18, #3037, lsl #12 // ADDS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | adds w12, w13, w26 // ADDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | adds w0, w23, w20 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | adds w13, w16, w12, lsl #28 // ADDS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | adds w20, w19, w16, asr #0 // ADDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x23, x12, x4 // ADDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x0, x13, x4, lsl #2 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x4, x7, x6, lsl #31 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x9, x8, x9, asr #41 // ADDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | ADDVv8i8v | addv b0, v28.8b // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | ADDVv16i8v | addv b1, v26.16b // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i16v | addv h18, v13.4h // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | ADDVv8i16v | addv h29, v17.8h // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i32v | addv s22, v18.4s // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDVL_XXI | addvl x1, x27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADR | adr x3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_D_0 | adr z26.d, [z1.d, z8.d] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_S_2 | adr z22.s, [z28.s, z8.s, lsl #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_0 | adr z11.d, [z2.d, z29.d, sxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_2 | adr z3.d, [z9.d, z9.d, sxtw #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_0 | adr z6.d, [z7.d, z13.d, uxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_1 | adr z4.d, [z24.d, z22.d, uxtw #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADRP | adrp x0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWri | and wsp, w16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXri | and x2, x22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z1.b, z1.b, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.h, z7.h, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.s, z7.s, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.d, z7.d, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | and p5.b, p1/z, p6.b, p4.b // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w11, w14, w24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w2, w21, w22, lsr #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x1, x20, x29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x8, x11, x22, asr #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ANDv8i8 | and v29.8b, v26.8b, v26.8b // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZPmZ_D | and z17.d, p6/m, z17.d, z12.d // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZZZ | and z9.d, z5.d, z17.d // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | ands w14, w8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | ands x4, x10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWrs | ands w29, w28, w12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | ands w7, w13, w23, asr #3 // ANDS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXrs | ands x21, x9, x6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | ands x10, x27, x7, asr #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | ands p5.b, p1/z, p2.b, p7.b // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ANDV_VPZ_H | andv h7, p6, z31.h // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | asr w30, w14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | asr x12, x21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_B | asr z7.b, p5/m, z7.b, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_H | asr z6.h, p6/m, z6.h, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_S | asr z28.s, p0/m, z28.s, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_D | asr z26.d, p5/m, z26.d, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_B | asr z10.b, z14.b, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_H | asr z23.h, z18.h, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_S | asr z29.s, z11.s, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_D | asr z20.d, z26.d, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w3, w0, w20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x7, x5, x21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmZ_S | asr z3.s, p0/m, z3.s, z10.s // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZPmZ_S | asr z9.s, p2/m, z9.s, z8.d // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZZZ_S | asr z26.s, z21.s, z21.d // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_B | asrd z6.b, p4/m, z6.b, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_H | asrd z19.h, p3/m, z19.h, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_S | asrd z16.s, p3/m, z16.s, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_D | asrd z9.d, p6/m, z9.d, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRR_ZPmZ_B | asrr z0.b, p0/m, z0.b, z19.b // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w24, w28, w13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x3, x21, x24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | at s12e1r, x28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | B | b test // B <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.eq test // B.eq <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.eq test // B.none <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ne test // B.ne <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ne test // B.any <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.cs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.hs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hs test // B.nlast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.cc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.lo <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lo test // B.last <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.mi test // B.mi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.mi test // B.first <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.pl test // B.pl <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.pl test // B.nfrst <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.vs test // B.vs <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.vc test // B.vc <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hi test // B.hi <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.hi test // B.pmore <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ls test // B.ls <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ls test // B.plast <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ge test // B.ge <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.ge test // B.tcont <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lt test // B.lt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.lt test // B.tstop <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.gt test // B.gt <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.le test // B.le <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | BFCVT | bfcvt h6, s20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVT_ZPmZ | bfcvt z16.h, p6/m, z1.s // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | BFCVTN | bfcvtn v12.4h, v15.4s // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | BFCVTN2 | bfcvtn2 v15.8h, v13.4s // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVTNT_ZPmZ | bfcvtnt z11.h, p7/m, z24.s // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BF16DOTlanev4bf16 | bfdot v0.2s, v24.4h, v14.2h[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZI | bfdot z24.s, z26.h, z2.h[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFDOTv8bf16 | bfdot v31.4s, v21.8h, v14.8h // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZZ | bfdot z15.s, z3.h, z7.h // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w10, w26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfi x25, x7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w30, w26, #18, #13 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x15, x20, #0, #36 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZI | bfmlalb z13.s, z30.h, z0.h[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZ | bfmlalb z3.s, z14.h, z13.h // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALBIdx | bfmlalb v22.4s, v11.8h, v11.h[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALTIdx | bfmlalt v17.4s, v4.8h, v11.h[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALB | bfmlalb v13.4s, v5.8h, v17.8h // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALT | bfmlalt v10.4s, v16.8h, v1.8h // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZI | bfmlalt z23.s, z3.h, z2.h[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZ | bfmlalt z25.s, z21.h, z22.h // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 3 | 4.00 | V1UnitV | BFMMLA | bfmmla v15.4s, v28.8h, v23.8h // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 3 | 2.00 | V1UnitV, V1UnitV01 | BFMMLA_ZZZ | bfmmla z26.s, z2.h, z12.h // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfxil w27, w23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x0, x5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z28.b, z28.b, #0x8f // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z18.h, z18.h, #0xff9f // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z12.s, z12.s, #0xfffffffd // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z6.d, z6.d, #0xfffffffffffffffb // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BIC_PPzPP | bic p4.b, p4/z, p6.b, p0.b // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w0, w26, w22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w23, w10, w7, lsl #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x20, x14 // BIC <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x3, x17, lsr #35 // BIC <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv4i16 | bic v6.4h, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv8i16 | bic v23.8h, #101 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v24.2s, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v31.2s, #192 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv16i8 | bic v25.16b, v10.16b, v9.16b // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZPmZ_D | bic z15.d, p4/m, z15.d, z25.d // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZZZ | bic z7.d, z8.d, z28.d // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | BICSWrs | bics w24, w1, w25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSWrs | bics w21, w0, w24, lsl #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | BICSXrs | bics x27, x25, x10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSXrs | bics x22, x6, x27, lsl #62 // BICS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BICS_PPzPP | bics p2.b, p4/z, p1.b, p7.b // BICS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BIFv8i8 | bif v0.8b, v25.8b, v4.8b // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BITv8i8 | bit v5.8b, v12.8b, v22.8b // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BL | bl test // BL <label> \\ Branch and link, immed \\ 2 1 1 2.0 V1UnitB,V1UnitS
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BLR | blr x11 // BLR <Xn> \\ Branch and link, register \\ 2 1 1 2.0 V1UnitB,V1UnitS
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | BR | br x17 // BR <Xn> \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | BRK | brk #0x8415 // BRK #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKA_PPzP | brka p7.b, p7/z, p5.b // BRKA <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKAS_PPzP | brkas p6.b, p5/z, p0.b // BRKAS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKB_PPzP | brkb p5.b, p0/z, p1.b // BRKB <Pd>.B, <Pg>/<ZM>, <Pn>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKBS_PPzP | brkbs p6.b, p1/z, p4.b // BRKBS <Pd>.B, <Pg>/Z, <Pn>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKN_PPzP | brkn p7.b, p0/z, p6.b, p7.b // BRKN <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKNS_PPzP | brkns p3.b, p1/z, p7.b, p3.b // BRKNS <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKPA_PPzPP | brkpa p3.b, p5/z, p0.b, p1.b // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPAS_PPzPP | brkpas p2.b, p5/z, p1.b, p3.b // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKPB_PPzPP | brkpb p1.b, p0/z, p7.b, p6.b // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPBS_PPzPP | brkpbs p7.b, p1/z, p6.b, p1.b // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BSLv16i8 | bsl v27.16b, v13.16b, v21.16b // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZW | cbnz w21, test // CBNZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZX | cbnz x26, test // CBNZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBZW | cbz w6, test // CBZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBZX | cbz x4, test // CBZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNWi | ccmn w8, #14, #3, hs // CCMN <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNXi | ccmn x23, #17, #0, gt // CCMN <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNWr | ccmn w17, w18, #12, hs // CCMN <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMNXr | ccmn x19, x29, #12, lo // CCMN <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPWi | ccmp w24, #2, #5, hs // CCMP <Wn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPXi | ccmp x12, #8, #2, lo // CCMP <Xn>, #<imm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPWr | ccmp w2, w9, #3, lt // CCMP <Wn>, <Wm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CCMPXr | ccmp x11, x10, #13, ls // CCMP <Xn>, <Xm>, #<nzcv>, <cond> \\ Conditional compare, flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | cinc w23, w5, lt // CINC <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | cinc x2, x1, pl // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | cinv w9, w12, ge // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | cinv x9, x30, mi // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_VPZ_B | clasta b11, p4, b11, z21.b // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_RPZ_B | clasta w8, p0, w8, z6.b // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_ZPZ_S | clasta z25.s, p1, z25.s, z14.s // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_VPZ_D | clastb d6, p7, d6, z31.d // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_RPZ_B | clastb w28, p6, w28, z12.b // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_ZPZ_H | clastb z27.h, p6, z27.h, z22.h // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex // CLREX \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLSv8i8 | cls v5.8b, v22.8b // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSWr | cls w25, w0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSXr | cls x22, x6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLS_ZPmZ_D | cls z28.d, p3/m, z2.d // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLZv8i16 | clz v24.8h, v30.8h // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZWr | clz w26, w27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZXr | clz x4, x0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLZ_ZPmZ_S | clz z3.s, p3/m, z18.s // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64 | cmeq d26, d5, d25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv8i16 | cmeq v9.8h, v16.8h, v24.8h // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64rz | cmeq d7, d26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv4i16rz | cmeq v14.4h, v18.4h, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64 | cmge d26, d21, d28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv8i16 | cmge v22.8h, v16.8h, v3.8h // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64rz | cmge d30, d12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv16i8rz | cmge v22.16b, v30.16b, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64 | cmgt d23, d25, d12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i64 | cmgt v3.2d, v29.2d, v11.2d // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64rz | cmgt d28, d14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i32rz | cmgt v22.2s, v10.2s, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv1i64 | cmhi d29, d16, d5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv4i16 | cmhi v28.4h, v25.4h, v21.4h // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv1i64 | cmhs d5, d3, d12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv8i8 | cmhs v6.8b, v31.8b, v12.8b // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv1i64rz | cmle d14, d21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv2i32rz | cmle v21.2s, v19.2s, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv1i64rz | cmlt d21, d24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv4i16rz | cmlt v26.4h, v12.4h, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w7 // CMN <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w8, sxtb // CMN <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w3, uxtb #3 // CMN <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w7, lsl #3 // CMN <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x2, x28 // CMN <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrx | cmn x3, w0, uxtb // CMN <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrx | cmn x0, w4, uxtb #3 // CMN <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x14, x26, lsl #2 // CMN <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWri | cmn wsp, #613 // CMN <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWri | cmn wsp, #2991, lsl #12 // CMN <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXri | cmn x23, #3803 // CMN <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXri | cmn x29, #3786, lsl #12 // CMN <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | cmn w12, w0 // CMN <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrs | cmn w19, w27, lsl #1 // CMN <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | cmn w2, w11, lsl #29 // CMN <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSWrs | cmn w0, w0, asr #30 // CMN <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x23, x28 // CMN <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | cmn x6, x1, lsl #2 // CMN <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | cmn x28, x30, lsl #26 // CMN <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | cmn x25, x15, lsr #49 // CMN <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | cmp wsp, w26 // CMP <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | cmp wsp, w13, sxth // CMP <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | cmp wsp, w12, sxth #3 // CMP <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | cmp wsp, w30, lsl #4 // CMP <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x22, x18 // CMP <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrx | cmp x16, w27, uxtb // CMP <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrx | cmp x16, w7, uxtb #4 // CMP <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x21, x24, lsl #4 // CMP <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | cmp wsp, #2342 // CMP <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWri | cmp wsp, #3664, lsl #12 // CMP <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | cmp x5, #1482 // CMP <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXri | cmp x4, #3684, lsl #12 // CMP <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | cmp w14, w0, lsl #4 // CMP <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | cmp w0, w23, lsl #29 // CMP <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | cmp w2, w28, lsr #20 // CMP <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x27, x10, lsl #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x18, x12, lsl #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x6, x7, lsr #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZI_H | cmpeq p2.h, p0/z, z26.h, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZI_B | cmpge p1.b, p4/z, z28.b, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZI_B | cmpgt p1.b, p0/z, z13.b, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZI_D | cmphi p1.d, p3/z, z23.d, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZI_D | cmphs p7.d, p5/z, z23.d, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_PPzZI_B | cmple p5.b, p2/z, z9.b, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLO_PPzZI_S | cmplo p3.s, p5/z, z18.s, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_PPzZI_D | cmpls p6.d, p6/z, z31.d, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLT_PPzZI_H | cmplt p0.h, p6/z, z29.h, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZI_S | cmpne p5.s, p4/z, z18.s, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_S | cmpeq p6.s, p5/z, z2.s, z9.s // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_S | cmpge p7.s, p4/z, z15.s, z15.s // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_H | cmpgt p2.h, p4/z, z26.h, z11.h // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_S | cmphi p0.s, p4/z, z8.s, z4.s // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p1.d, p6/z, z26.d, z15.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZZ_B | cmpne p4.b, p3/z, z21.b, z16.b // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_D | cmpeq p2.d, p3/z, z13.d, z18.d // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_WIDE_PPzZZ_B | cmpge p2.b, p3/z, z3.b, z16.d // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_WIDE_PPzZZ_H | cmpgt p2.h, p2/z, z28.h, z30.d // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_WIDE_PPzZZ_H | cmphi p0.h, p5/z, z30.h, z16.d // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_WIDE_PPzZZ_H | cmphs p7.h, p2/z, z1.h, z26.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_WIDE_PPzZZ_B | cmple p7.b, p7/z, z3.b, z13.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_D | cmphi p6.d, p2/z, z16.d, z16.d // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_WIDE_PPzZZ_H | cmpls p3.h, p2/z, z12.h, z26.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_D | cmpgt p0.d, p4/z, z26.d, z29.d // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_WIDE_PPzZZ_S | cmpne p0.s, p4/z, z30.s, z8.d // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_D | cmpge p1.d, p3/z, z26.d, z2.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_B | cmphi p7.b, p0/z, z25.b, z4.b // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p4.d, p4/z, z14.d, z2.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_S | cmpgt p2.s, p2/z, z21.s, z31.s // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv1i64 | cmtst d10, d6, d5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv2i64 | cmtst v13.2d, v13.2d, v13.2d // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGWr | cneg w3, w17, hi // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGXr | cneg x26, x8, lo // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNOT_ZPmZ_S | cnot z7.s, p7/m, z8.s // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CNTv16i8 | cnt v12.16b, v14.16b // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNT_ZPmZ_H | cnt z26.h, p0/m, z27.h // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x9, vl128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x28, vl8, mul #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x20 // CNTD <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x27, vl7 // CNTD <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTD_XPiI | cntd x8, vl7, mul #2 // CNTD <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x27 // CNTH <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x0, vl1 // CNTH <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTH_XPiI | cnth x16, vl3, mul #6 // CNTH <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x22 // CNTW <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x23, vl3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x6, vl16, mul #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTP_XPP_S | cntp x22, p1, p2.s // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | COMPACT_ZPZ_S | compact z17.s, p1, z18.s // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_B | mov z13.b, p0/m, b6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z3.b, p6/m, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_S | mov z11.s, p5/m, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z0.h, p0/m, #-11 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z5.b, p1/z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_H | mov z12.h, p1/z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z25.d, p3/z, #-20736 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_H | mov z24.h, p0/m, w19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_S | mov z23.s, p2/m, wsp // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Brr | crc32b w27, w12, w15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Hrr | crc32h w3, w15, w21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Wrr | crc32w w9, w18, w24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Xrr | crc32x w19, w6, x25 // CRC32X <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CBrr | crc32cb w25, w28, w30 // CRC32CB <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CHrr | crc32ch w25, w26, w16 // CRC32CH <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CWrr | crc32cw w27, w12, w23 // CRC32CW <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32CXrr | crc32cx w21, w28, x5 // CRC32CX <Wd>, <Wn>, <Xm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | csdb // CSDB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSELWr | csel w25, w16, w30, ls // CSEL <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSELXr | csel x28, x1, x2, pl // CSEL <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | cset w6, ne // CSET <Wd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | cset x11, lt // CSET <Xd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | csetm w3, hi // CSETM <Wd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | csetm x6, ne // CSETM <Xd>, <cond> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCWr | csinc w9, w3, w14, lt // CSINC <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | csinc x20, x11, x23, ge // CSINC <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | csinv w1, w4, w3, hs // CSINV <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | csinv x27, x21, x15, ne // CSINV <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGWr | csneg w5, w13, w4, hi // CSNEG <Wd>, <Wn>, <Wm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGXr | cneg x8, x29, ls // CSNEG <Xd>, <Xn>, <Xm>, <cond> \\ Conditional select \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CTERMEQ_XX | ctermeq x4, x11 // CTERMEQ <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CTERMNE_XX | ctermne x0, x16 // CTERMNE <R><n>, <R><m> \\ Loop terminate \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | dc csw, x16 // DC <dc_op>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS1 | dcps1 // DCPS1 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS1 | dcps1 #0x1127 // DCPS1 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS2 | dcps2 // DCPS2 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS2 | dcps2 #0x6884 // DCPS2 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS3 | dcps3 // DCPS3 \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DCPS3 | dcps3 #0xb8e2 // DCPS3 #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x22 // DECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x5, vl256 // DECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECB_XPiI | decb x21, vl256, mul #7 // DECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x11 // DECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x19 // DECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECD_XPiI | decd x24, vl2, mul #10 // DECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x16 // DECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x20, mul4 // DECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECH_XPiI | dech x0, mul3, mul #15 // DECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x18, vl32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x29, vl6, mul #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z19.d // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z22.d, mul3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z1.d, vl128, mul #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z23.h // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z29.h, vl5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z28.h, vl64, mul #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z8.s // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z4.s, vl64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z27.s, vl4, mul #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECP_XP_B | decp x6, p6.b // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | DECP_ZP_H | decp z22.h, p1.h // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb sy // DMB <option> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb nshst // DMB #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | DRPS | drps // DRPS \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v25.b[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h2, v31.h[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s10, v2.s[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d24, v7.d[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i8lane | dup v25.8b, v21.b[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i16lane | dup v28.8h, v29.h[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv4i32lane | dup v24.4s, v9.s[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv2i64lane | dup v20.2d, v3.d[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUPv4i32gpr | dup v19.4s, w27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z15.h, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z22.d, #-14 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z2.b, z26.b[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z23.h, z22.h[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z29.s, z30.s[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z4.d, d7 // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_D | mov z25.d, x28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_S | mov z18.s, wsp // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z18.b, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z12.h, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.s, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.d, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w29, w4, w19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w14, w24, w28, asr #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x19, x12, x2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x23, x23, x23, asr #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z7.b, z7.b, #0x8f // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z3.h, z3.h, #0xff9f // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z2.s, z2.s, #0xfffffffd // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z24.d, z24.d, #0xfffffffffffffffb // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWri | eor wsp, w4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXri | eor x27, x25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z19.b, z19.b, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z18.h, z18.h, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z10.s, z10.s, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z29.d, z29.d, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | eor p6.b, p7/z, p3.b, p5.b // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w27, w2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w7, w29, asr #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x22, x16, x6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x0, x23, x30, lsl #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EORv16i8 | eor v8.16b, v10.16b, v19.16b // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZPmZ_H | eor z8.h, p3/m, z8.h, z14.h // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZZZ | eor z30.d, z26.d, z20.d // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | eors p1.b, p0/z, p3.b, p1.b // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | EORV_VPZ_H | eorv h17, p1, z15.h // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | ERET | eret // ERET \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | esb // ESB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv8i8 | ext v12.8b, v22.8b, v31.8b, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv16i8 | ext v17.16b, v18.16b, v8.16b, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w19, w20, #16 // EXTR <Wd>, <Wn>, <Wn>, #<lsbs> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | EXTRWrri | extr w27, w4, w5, #23 // EXTR <Wd>, <Wn>, <Wm>, #<lsbs> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x25, x22, #62 // EXTR <Xd>, <Xn>, <Xn>, #<lsbd> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | EXTRXrri | extr x0, x12, x13, #17 // EXTR <Xd>, <Xn>, <Xm>, #<lsbd> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD16 | fabd h27, h20, h17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD32 | fabd s16, s29, s6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv8f16 | fabd v13.8h, v28.8h, v12.8h // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv4f32 | fabd v12.4s, v4.4s, v31.4s // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABD_ZPmZ_H | fabd z11.h, p6/m, z11.h, z5.h // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSHr | fabs h25, h7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSSr | fabs s17, s12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSDr | fabs d30, d8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv4f32 | fabs v16.4s, v31.4s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv2f32 | fabs v17.2s, v28.2s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABS_ZPmZ_S | fabs z26.s, p7/m, z24.s // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p0.h, p5/z, z15.h, z18.h // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_S | facgt p7.s, p7/z, z10.s, z4.s // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE16 | facge h24, h26, h29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE64 | facge d25, d24, d7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv4f16 | facge v25.4h, v16.4h, v11.4h // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv2f32 | facge v19.2s, v24.2s, v5.2s // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT16 | facgt h0, h4, h10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT32 | facgt s29, s3, s2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv8f16 | facgt v22.8h, v14.8h, v31.8h // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv4f32 | facgt v22.4s, v8.4s, v2.4s // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p7.h, p5/z, z27.h, z22.h // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_H | facgt p5.h, p5/z, z16.h, z31.h // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmI_H | fadd z4.h, p7/m, z4.h, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDHrr | fadd h23, h27, h22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDSrr | fadd s1, s23, s27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDDrr | fadd d16, d15, d21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v7.2d, v30.2d, v20.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v16.2d, v13.2d, v11.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmZ_H | fadd z26.h, p4/m, z26.h, z1.h // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZZZ_S | fadd z23.s, z7.s, z16.s // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 19 | 19 | 0.06 | V1UnitV[18], V1UnitV0[18], V1UnitV01[18], V1UnitV02[18] | FADDA_VPZ_H | fadda h8, p3, h8, z28.h // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 1 19 19 0.06 V1UnitV0[18]
+# CHECK-NEXT: 1 | 11 | 11 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FADDA_VPZ_S | fadda s11, p6, s11, z1.s // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 1 11 11 0.1 V1UnitV0[10]
+# CHECK-NEXT: 1 | 8 | 8 | 0.67 | V1UnitV[3], V1UnitV01[3] | FADDA_VPZ_D | fadda d27, p4, d27, z27.d // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 1 8 8 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i16p | faddp h10, v19.2h // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i64p | faddp d11, v28.2d // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2f64 | faddp v16.2d, v11.2d, v5.2d // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv4f32 | faddp v16.4s, v11.4s, v18.4s // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FADDV_VPZ_H | faddv h21, p2, z3.h // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FADDV_VPZ_S | faddv s16, p2, z25.s // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FADDV_VPZ_D | faddv d18, p4, z7.d // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FCADD_ZPmZ_H | fcadd z29.h, p2/m, z29.h, z15.h, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPHrr | fccmp h31, h3, #11, hs // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPSrr | fccmp s5, s6, #0, lo // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPDrr | fccmp d17, d15, #0, ne // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEHrr | fccmpe h6, h1, #12, ne // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPESrr | fccmpe s16, s13, #10, vs // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEDrr | fccmpe d17, d14, #15, ls // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZZ_D | fcmeq p7.d, p1/z, z23.d, z21.d // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_H | fcmge p6.h, p1/z, z19.h, z10.h // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p5.s, p2/z, z29.s, z5.s // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZZ_D | fcmne p5.d, p0/z, z22.d, z15.d // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMUO_PPzZZ_D | fcmuo p0.d, p2/z, z15.d, z23.d // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZ0_D | fcmeq p4.d, p5/z, z19.d, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZ0_D | fcmge p0.d, p5/z, z10.d, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZ0_D | fcmgt p6.d, p1/z, z8.d, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLE_PPzZ0_D | fcmle p2.d, p4/z, z26.d, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLT_PPzZ0_D | fcmlt p5.d, p5/z, z23.d, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZ0_H | fcmne p2.h, p3/z, z7.h, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ16 | fcmeq h30, h6, h1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ32 | fcmeq s17, s0, s21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2f32 | fcmeq v19.2s, v31.2s, v19.2s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv4f32 | fcmeq v12.4s, v11.4s, v26.4s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i16rz | fcmeq h19, h23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i32rz | fcmeq s25, s18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i32rz | fcmeq v8.2s, v16.2s, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i64rz | fcmeq v18.2d, v17.2d, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE16 | fcmge h1, h16, h12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE64 | fcmge d29, d9, d3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv8f16 | fcmge v20.8h, v19.8h, v22.8h // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2f64 | fcmge v17.2d, v11.2d, v13.2d // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i16rz | fcmge h10, h23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i64rz | fcmge d5, d17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv4i16rz | fcmge v18.4h, v27.4h, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2i32rz | fcmge v17.2s, v11.2s, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT16 | fcmgt h4, h5, h0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT32 | fcmgt s13, s20, s3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8f16 | fcmgt v24.8h, v24.8h, v28.8h // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv4f32 | fcmgt v19.4s, v20.4s, v13.4s // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i16rz | fcmgt h0, h18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i64rz | fcmgt d30, d23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8i16rz | fcmgt v0.8h, v11.8h, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv2i64rz | fcmgt v19.2d, v31.2d, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_H | fcmla z20.h, z12.h, z4.h[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_S | fcmla z1.s, z27.s, z6.s[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZPmZZ_S | fcmla z25.s, p3/m, z13.s, z23.s, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_S | fcmge p5.s, p3/z, z12.s, z28.s // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i16rz | fcmle h18, h28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i64rz | fcmle d18, d16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv8i16rz | fcmle v16.8h, v11.8h, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv4i32rz | fcmle v22.4s, v30.4s, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p1.s, p1/z, z24.s, z13.s // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i16rz | fcmlt h23, h7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i64rz | fcmlt d22, d28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv4i16rz | fcmlt v8.4h, v2.4h, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv2i64rz | fcmlt v7.2d, v16.2d, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHrr | fcmp h5, h21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHri | fcmp h5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSrr | fcmp s7, s0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSri | fcmp s28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDrr | fcmp d1, d27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDri | fcmp d16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHrr | fcmpe h22, h21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHri | fcmpe h13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESrr | fcmpe s11, s29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESri | fcmpe s15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDrr | fcmpe d27, d22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDri | fcmpe d9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_H | fmov z2.h, p7/m, #0.50000000 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELHrrr | fcsel h26, h2, h11, hs // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELSrrr | fcsel s5, s1, s4, vc // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELDrrr | fcsel d14, d0, d19, eq // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSHr | fcvt s13, h13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDHr | fcvt d10, h6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHSr | fcvt h1, s1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDSr | fcvt d9, s23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHDr | fcvt h17, d16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSDr | fcvt s31, d27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_HtoS | fcvt z0.s, p1/m, z4.h // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_HtoD | fcvt z6.d, p0/m, z17.h // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_StoH | fcvt z7.h, p7/m, z5.s // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_StoD | fcvt z11.d, p2/m, z18.s // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoH | fcvt z26.h, p0/m, z30.d // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoS | fcvt z13.s, p2/m, z3.d // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWHr | fcvtas w23, h3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXHr | fcvtas x14, h29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWSr | fcvtas w0, s13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXSr | fcvtas x23, s15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWDr | fcvtas w1, d31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXDr | fcvtas x2, d3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1f16 | fcvtas h27, h24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1i32 | fcvtas s16, s0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1i64 | fcvtas d14, d7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f16 | fcvtas v5.4h, v16.4h // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTASv8f16 | fcvtas v13.8h, v30.8h // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f32 | fcvtas v12.2s, v1.2s // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f32 | fcvtas v9.4s, v31.4s // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f64 | fcvtas v2.2d, v22.2d // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWHr | fcvtau w13, h27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXHr | fcvtau x8, h12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWSr | fcvtau w20, s10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXSr | fcvtau x27, s22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWDr | fcvtau w6, d26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXDr | fcvtau x16, d13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1f16 | fcvtau h6, h29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1i32 | fcvtau s23, s7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1i64 | fcvtau d1, d26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f16 | fcvtau v12.4h, v13.4h // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTAUv8f16 | fcvtau v21.8h, v0.8h // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f32 | fcvtau v31.2s, v6.2s // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f32 | fcvtau v29.4s, v26.4s // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f64 | fcvtau v9.2d, v7.2d // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv4i16 | fcvtl v30.4s, v4.4h // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv2i32 | fcvtl v28.2d, v13.2s // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv8i16 | fcvtl2 v14.4s, v29.8h // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv4i32 | fcvtl2 v0.2d, v9.4s // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWHr | fcvtms w15, h1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXHr | fcvtms x5, h2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWSr | fcvtms w1, s16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXSr | fcvtms x27, s22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWDr | fcvtms w18, d21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXDr | fcvtms x6, d26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1f16 | fcvtms h19, h29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1i32 | fcvtms s30, s14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1i64 | fcvtms d8, d20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f16 | fcvtms v27.4h, v7.4h // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTMSv8f16 | fcvtms v26.8h, v11.8h // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f32 | fcvtms v13.2s, v2.2s // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f32 | fcvtms v18.4s, v21.4s // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f64 | fcvtms v15.2d, v16.2d // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWHr | fcvtmu w20, h6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXHr | fcvtmu x7, h18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWSr | fcvtmu w24, s19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXSr | fcvtmu x7, s15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWDr | fcvtmu w16, d16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXDr | fcvtmu x1, d18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1f16 | fcvtmu h20, h13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1i32 | fcvtmu s28, s25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1i64 | fcvtmu d3, d27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f16 | fcvtmu v18.4h, v2.4h // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTMUv8f16 | fcvtmu v10.8h, v11.8h // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f32 | fcvtmu v27.2s, v14.2s // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f32 | fcvtmu v31.4s, v4.4s // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f64 | fcvtmu v6.2d, v26.2d // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv4i16 | fcvtn v4.4h, v22.4s // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv2i32 | fcvtn v14.2s, v2.2d // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv8i16 | fcvtn2 v0.8h, v30.4s // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv4i32 | fcvtn2 v21.4s, v13.2d // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWHr | fcvtns w19, h15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXHr | fcvtns x20, h0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWSr | fcvtns w10, s5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXSr | fcvtns x14, s12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWDr | fcvtns w30, d2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXDr | fcvtns x0, d12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1f16 | fcvtns h16, h25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1i32 | fcvtns s23, s19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1i64 | fcvtns d30, d1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f16 | fcvtns v28.4h, v19.4h // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTNSv8f16 | fcvtns v19.8h, v19.8h // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f32 | fcvtns v20.2s, v4.2s // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f32 | fcvtns v28.4s, v29.4s // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f64 | fcvtns v21.2d, v31.2d // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWHr | fcvtnu w12, h3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXHr | fcvtnu x23, h27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWSr | fcvtnu w4, s23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXSr | fcvtnu x5, s28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWDr | fcvtnu w4, d11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXDr | fcvtnu x12, d8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1f16 | fcvtnu h24, h22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1i32 | fcvtnu s29, s22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1i64 | fcvtnu d18, d15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f16 | fcvtnu v5.4h, v12.4h // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTNUv8f16 | fcvtnu v26.8h, v20.8h // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f32 | fcvtnu v15.2s, v1.2s // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f32 | fcvtnu v7.4s, v16.4s // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f64 | fcvtnu v13.2d, v8.2d // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWHr | fcvtps w27, h14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXHr | fcvtps x26, h20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWSr | fcvtps w5, s27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXSr | fcvtps x29, s6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWDr | fcvtps w23, d25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXDr | fcvtps x10, d16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1f16 | fcvtps h31, h22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1i32 | fcvtps s3, s3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1i64 | fcvtps d10, d26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f16 | fcvtps v13.4h, v26.4h // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTPSv8f16 | fcvtps v26.8h, v10.8h // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f32 | fcvtps v18.2s, v8.2s // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f32 | fcvtps v12.4s, v18.4s // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f64 | fcvtps v3.2d, v2.2d // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWHr | fcvtpu w25, h22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXHr | fcvtpu x4, h24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWSr | fcvtpu w13, s0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXSr | fcvtpu x0, s17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWDr | fcvtpu w16, d25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXDr | fcvtpu x15, d12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1f16 | fcvtpu h1, h29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1i32 | fcvtpu s21, s30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1i64 | fcvtpu d16, d26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f16 | fcvtpu v2.4h, v25.4h // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTPUv8f16 | fcvtpu v24.8h, v26.8h // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f32 | fcvtpu v6.2s, v23.2s // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f32 | fcvtpu v10.4s, v6.4s // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f64 | fcvtpu v7.2d, v23.2d // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv1i64 | fcvtxn s29, d4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv2f32 | fcvtxn v25.2s, v15.2d // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv4f32 | fcvtxn2 v21.4s, v6.2d // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWHri | fcvtzs w28, h26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXHri | fcvtzs x22, h17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWSri | fcvtzs w17, s23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXSri | fcvtzs x15, s30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWDri | fcvtzs w13, d17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXDri | fcvtzs x14, d9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWHr | fcvtzs w15, h10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXHr | fcvtzs x4, h21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWSr | fcvtzs w1, s4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXSr | fcvtzs x27, s27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWDr | fcvtzs w24, d30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXDr | fcvtzs x18, d21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSh | fcvtzs h29, h23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSs | fcvtzs s23, s15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSd | fcvtzs d20, d26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i16_shift | fcvtzs v20.4h, v24.4h, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZSv8i16_shift | fcvtzs v18.8h, v10.8h, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i32_shift | fcvtzs v16.2s, v2.2s, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i32_shift | fcvtzs v22.4s, v18.4s, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i64_shift | fcvtzs v14.2d, v30.2d, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1f16 | fcvtzs h16, h27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1i32 | fcvtzs s4, s5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1i64 | fcvtzs d4, d23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f16 | fcvtzs v8.4h, v16.4h // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZSv8f16 | fcvtzs v2.8h, v16.8h // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f32 | fcvtzs v27.2s, v28.2s // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f32 | fcvtzs v29.4s, v18.4s // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f64 | fcvtzs v13.2d, v31.2d // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoH | fcvtzs z1.h, p2/m, z6.h // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoS | fcvtzs z19.s, p4/m, z16.h // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoD | fcvtzs z14.d, p0/m, z6.h // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_StoS | fcvtzs z25.s, p5/m, z23.s // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_StoD | fcvtzs z3.d, p1/m, z31.s // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoS | fcvtzs z28.s, p5/m, z23.d // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoD | fcvtzs z22.d, p6/m, z29.d // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWHri | fcvtzu w12, h19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXHri | fcvtzu x17, h23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWSri | fcvtzu w16, s3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXSri | fcvtzu x27, s15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWDri | fcvtzu w21, d10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXDri | fcvtzu x26, d30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWHr | fcvtzu w26, h30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXHr | fcvtzu x9, h11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWSr | fcvtzu w20, s16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXSr | fcvtzu x7, s21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWDr | fcvtzu w25, d30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXDr | fcvtzu x13, d8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUh | fcvtzu h19, h8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUs | fcvtzu s25, s27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUd | fcvtzu d30, d16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i16_shift | fcvtzu v19.4h, v26.4h, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZUv8i16_shift | fcvtzu v27.8h, v6.8h, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i32_shift | fcvtzu v30.2s, v4.2s, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i32_shift | fcvtzu v31.4s, v6.4s, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i64_shift | fcvtzu v10.2d, v12.2d, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1f16 | fcvtzu h25, h30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1i32 | fcvtzu s2, s19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1i64 | fcvtzu d4, d7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f16 | fcvtzu v3.4h, v2.4h // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZUv8f16 | fcvtzu v30.8h, v25.8h // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f32 | fcvtzu v25.2s, v25.2s // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f32 | fcvtzu v21.4s, v2.4s // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f64 | fcvtzu v23.2d, v15.2d // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoH | fcvtzu z15.h, p0/m, z8.h // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoS | fcvtzu z8.s, p5/m, z18.h // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoD | fcvtzu z11.d, p4/m, z24.h // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_StoS | fcvtzu z13.s, p7/m, z8.s // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_StoD | fcvtzu z20.d, p2/m, z13.s // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoS | fcvtzu z31.s, p3/m, z20.d // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoD | fcvtzu z4.d, p1/m, z25.d // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitV[2], V1UnitV02[2] | FDIVHrr | fdiv h1, h26, h23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.67 | V1UnitV[3], V1UnitV02[3] | FDIVSrr | fdiv s31, s18, s12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
+# CHECK-NEXT: 1 | 15 | 15 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVDrr | fdiv d6, d3, d0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv4f16 | fdiv v21.4h, v15.4h, v22.4h // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitV[13], V1UnitV02[13] | FDIVv8f16 | fdiv v31.8h, v12.8h, v15.8h // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitV[5], V1UnitV02[5] | FDIVv2f32 | fdiv v15.2s, v23.2s, v2.2s // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitV[9], V1UnitV02[9] | FDIVv4f32 | fdiv v7.4s, v27.4s, v22.4s // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+# CHECK-NEXT: 1 | 15 | 15 | 0.14 | V1UnitV[14], V1UnitV02[14] | FDIVv2f64 | fdiv v31.2d, v25.2d, v8.2d // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.14 V1UnitV02[14]
+# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FDIV_ZPmZ_H | fdiv z21.h, p7/m, z21.h, z15.h // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FDIV_ZPmZ_S | fdiv z17.s, p4/m, z17.s, z20.s // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+# CHECK-NEXT: 1 | 15 | 15 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FDIV_ZPmZ_D | fdiv z13.d, p3/m, z13.d, z28.d // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
+# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FDIVR_ZPmZ_H | fdivr z29.h, p4/m, z29.h, z1.h // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FDIVR_ZPmZ_S | fdivr z13.s, p0/m, z13.s, z29.s // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+# CHECK-NEXT: 1 | 15 | 15 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FDIVR_ZPmZ_D | fdivr z14.d, p3/m, z14.d, z31.d // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z19.s, #0.50000000 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FEXPA_ZZ_H | fexpa z6.h, z3.h // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAD_ZPmZZ_S | fmad z9.s, p5/m, z9.s, z7.s // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDHrrr | fmadd h27, h0, h6, h28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDSrrr | fmadd s13, s24, s15, s5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDDrrr | fmadd d19, d4, d2, d17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmI_D | fmax z25.d, p2/m, z25.d, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXHrr | fmax h8, h7, h11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXSrr | fmax s9, s21, s2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXDrr | fmax d4, d26, d26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v0.4s, v13.4s, v21.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v12.4s, v27.4s, v11.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmZ_S | fmax z16.s, p5/m, z16.s, z12.s // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmI_D | fmaxnm z25.d, p5/m, z25.d, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMHrr | fmaxnm h29, h13, h14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMSrr | fmaxnm s25, s20, s0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMDrr | fmaxnm d29, d25, d16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv4f32 | fmaxnm v6.4s, v3.4s, v3.4s // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv2f64 | fmaxnm v9.2d, v15.2d, v11.2d // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmZ_S | fmaxnm z6.s, p5/m, z6.s, z17.s // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i16p | fmaxnmp h25, v19.2h // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i64p | fmaxnmp d17, v29.2d // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v31.4s, v4.4s, v2.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v23.4s, v15.4s, v1.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i16v | fmaxnmv h0, v13.4h // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXNMVv8i16v | fmaxnmv h12, v11.8h // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i32v | fmaxnmv s28, v31.4s // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXNMV_VPZ_H | fmaxnmv h9, p3, z2.h // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMAXNMV_VPZ_S | fmaxnmv s26, p6, z0.s // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMAXNMV_VPZ_D | fmaxnmv d7, p1, z29.d // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i16p | fmaxp h15, v25.2h // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i32p | fmaxp s6, v2.2s // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2f32 | fmaxp v21.2s, v17.2s, v13.2s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv4f32 | fmaxp v10.4s, v5.4s, v25.4s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i16v | fmaxv h23, v4.4h // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXVv8i16v | fmaxv h25, v15.8h // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i32v | fmaxv s23, v2.4s // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXV_VPZ_H | fmaxv h12, p0, z22.h // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMAXV_VPZ_S | fmaxv s24, p5, z12.s // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMAXV_VPZ_D | fmaxv d1, p6, z25.d // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmI_D | fmin z24.d, p4/m, z24.d, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINHrr | fmin h4, h13, h17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINSrr | fmin s1, s14, s22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINDrr | fmin d18, d19, d22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv4f32 | fmin v6.4s, v25.4s, v27.4s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv2f32 | fmin v12.2s, v30.2s, v25.2s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmZ_H | fmin z11.h, p3/m, z11.h, z16.h // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmI_H | fminnm z19.h, p4/m, z19.h, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMHrr | fminnm h29, h23, h17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMSrr | fminnm s24, s14, s30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMDrr | fminnm d0, d26, d8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv2f32 | fminnm v16.2s, v23.2s, v27.2s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv4f32 | fminnm v23.4s, v19.4s, v22.4s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmZ_S | fminnm z24.s, p3/m, z24.s, z13.s // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i16p | fminnmp h20, v14.2h // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i64p | fminnmp d15, v8.2d // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2f64 | fminnmp v27.2d, v27.2d, v16.2d // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv4f32 | fminnmp v2.4s, v14.4s, v14.4s // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i16v | fminnmv h19, v25.4h // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMINNMVv8i16v | fminnmv h23, v17.8h // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i32v | fminnmv s29, v17.4s // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINNMV_VPZ_H | fminnmv h24, p3, z1.h // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMINNMV_VPZ_S | fminnmv s30, p3, z9.s // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMINNMV_VPZ_D | fminnmv d18, p5, z8.d // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i16p | fminp h7, v10.2h // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i32p | fminp s17, v7.2s // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv4f32 | fminp v25.4s, v2.4s, v15.4s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2f32 | fminp v14.2s, v28.2s, v15.2s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i16v | fminv h3, v30.4h // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMINVv8i16v | fminv h29, v12.8h // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i32v | fminv s16, v19.4s // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINV_VPZ_H | fminv h15, p2, z25.h // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
+# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMINV_VPZ_S | fminv s4, p0, z6.s // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMINV_VPZ_D | fminv d20, p1, z5.d // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i16_indexed | fmla h23, h24, v15.h[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i32_indexed | fmla s9, s20, v28.s[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i64_indexed | fmla d12, d20, v7.d[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv8i16_indexed | fmla v29.8h, v15.8h, v10.h[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i32_indexed | fmla v2.2s, v16.2s, v28.s[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4i32_indexed | fmla v14.4s, v14.4s, v5.s[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i64_indexed | fmla v10.2d, v14.2d, v21.d[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_H | fmla z2.h, z4.h, z7.h[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_S | fmla z22.s, z15.s, z1.s[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_D | fmla z1.d, z30.d, z11.d[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4f32 | fmla v1.4s, v24.4s, v12.4s // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2f64 | fmla v30.2d, v16.2d, v6.2d // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_S | fmla z6.s, p1/m, z24.s, z24.s // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i16_indexed | fmls h8, h14, v7.h[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i32_indexed | fmls s20, s17, v5.s[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i64_indexed | fmls d11, d24, v29.d[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8i16_indexed | fmls v30.8h, v18.8h, v4.h[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i32_indexed | fmls v10.2s, v27.2s, v0.s[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv4i32_indexed | fmls v27.4s, v7.4s, v24.s[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i64_indexed | fmls v10.2d, v22.2d, v29.d[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_H | fmls z3.h, z31.h, z0.h[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_S | fmls z30.s, z8.s, z0.s[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_D | fmls z10.d, z20.d, z0.d[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2f32 | fmls v6.2s, v3.2s, v12.2s // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8f16 | fmls v6.8h, v15.8h, v23.8h // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZPmZZ_S | fmls z26.s, p5/m, z28.s, z26.s // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHWr | fmov w15, h31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHXr | fmov x21, h14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWHr | fmov h6, w5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWSr | fmov s22, w0 // FMOV <Sd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVSWr | fmov w23, s30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXHr | fmov h16, x27 // FMOV <Hd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXDr | fmov d22, x12 // FMOV <Dd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | FMOVXDHighr | fmov v7.d[1], x8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXr | fmov x26, d29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXHighr | fmov x4, v26.d[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_S | fmov z2.s, p0/m, #0.50000000 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z14.s, #0.50000000 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHr | fmov h18, h28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSr | fmov s13, s23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDr | fmov d27, d17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHi | fmov h29, #0.50000000 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSi | fmov s22, #0.50000000 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDi | fmov d18, #0.50000000 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v12.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v10.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f64_ns | fmov v0.2d, #0.50000000 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z2.d, p2/m, #0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z5.s, #0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMSB_ZPmZZ_S | fmsb z25.s, p5/m, z25.s, z29.s // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBHrrr | fmsub h25, h28, h12, h24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBSrrr | fmsub s31, s0, s23, s24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBDrrr | fmsub d12, d10, d20, d16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i16_indexed | fmul h18, h4, v7.h[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i32_indexed | fmul s17, s23, v30.s[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i64_indexed | fmul d27, d8, v10.d[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i16_indexed | fmul v10.4h, v2.4h, v7.h[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i32_indexed | fmul v5.2s, v12.2s, v9.s[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i32_indexed | fmul v15.4s, v30.4s, v2.s[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i64_indexed | fmul v11.2d, v31.2d, v24.d[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmI_H | fmul z17.h, p5/m, z17.h, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_H | fmul z27.h, z30.h, z0.h[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_S | fmul z6.s, z16.s, z1.s[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_D | fmul z4.d, z30.d, z2.d[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULHrr | fmul h28, h14, h3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULSrr | fmul s28, s16, s24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULDrr | fmul d19, d19, d0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v0.2d, v14.2d, v20.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v9.2d, v29.2d, v7.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmZ_D | fmul z22.d, p1/m, z22.d, z3.d // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZ_S | fmul z19.s, z14.s, z26.s // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i16_indexed | fmulx h18, h17, v7.h[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i32_indexed | fmulx s23, s3, v3.s[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i64_indexed | fmulx d3, d13, v30.d[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i16_indexed | fmulx v28.4h, v25.4h, v15.h[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i32_indexed | fmulx v3.2s, v22.2s, v23.s[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i32_indexed | fmulx v5.4s, v28.4s, v15.s[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i64_indexed | fmulx v22.2d, v18.2d, v25.d[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULX16 | fmulx h20, h25, h0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULX64 | fmulx d18, d19, d22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f64 | fmulx v22.2d, v18.2d, v4.2d // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f32 | fmulx v16.2s, v4.2s, v27.2s // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMULX_ZPmZ_H | fmulx z7.h, p5/m, z7.h, z21.h // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGHr | fneg h2, h9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGSr | fneg s11, s19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGDr | fneg d5, d16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f64 | fneg v26.2d, v2.2d // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f32 | fneg v14.2s, v24.2s // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNEG_ZPmZ_S | fneg z16.s, p0/m, z25.s // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMAD_ZPmZZ_H | fnmad z6.h, p2/m, z14.h, z21.h // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDHrrr | fnmadd h3, h18, h31, h24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDSrrr | fnmadd s8, s18, s2, s14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDDrrr | fnmadd d19, d29, d28, d30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLA_ZPmZZ_D | fnmla z15.d, p0/m, z8.d, z29.d // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLS_ZPmZZ_D | fnmls z13.d, p0/m, z8.d, z12.d // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMSB_ZPmZZ_D | fnmsb z30.d, p7/m, z8.d, z9.d // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBHrrr | fnmsub h3, h29, h24, h17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBSrrr | fnmsub s29, s26, s17, s4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBDrrr | fnmsub d7, d13, d13, d4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULHrr | fnmul h3, h15, h7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULSrr | fnmul s16, s11, s2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULDrr | fnmul d12, d22, d14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1f16 | frecpe h20, h8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i32 | frecpe s27, s7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i64 | frecpe d2, d1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv4f16 | frecpe v28.4h, v27.4h // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRECPEv8f16 | frecpe v9.8h, v6.8h // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv2f32 | frecpe v25.2s, v28.2s // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv4f32 | frecpe v21.4s, v18.4s // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv2f64 | frecpe v10.2d, v26.2d // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_H | frecpe z14.h, z0.h // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_S | frecpe z5.s, z16.s // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_D | frecpe z27.d, z11.d // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS16 | frecps h29, h19, h8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS64 | frecps d25, d17, d12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv8f16 | frecps v12.8h, v25.8h, v4.8h // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv2f64 | frecps v7.2d, v29.2d, v18.2d // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRECPS_ZZZ_S | frecps z11.s, z31.s, z1.s // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1f16 | frecpx h18, h11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1i32 | frecpx s13, s30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPX_ZPmZ_S | frecpx z15.s, p4/m, z12.s // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_H | frintn z30.h, p3/m, z31.h // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_S | frintn z17.s, p4/m, z23.s // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_D | frintn z28.d, p1/m, z25.d // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_H | frinta z10.h, p6/m, z17.h // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_S | frinta z7.s, p4/m, z27.s // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_D | frinta z17.d, p4/m, z17.d // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_H | frintm z26.h, p7/m, z0.h // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_S | frintm z6.s, p0/m, z28.s // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_D | frintm z29.d, p4/m, z3.d // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_H | frintp z20.h, p4/m, z12.h // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_S | frintp z3.s, p7/m, z18.s // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_D | frintp z28.d, p7/m, z4.d // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_H | frintz z27.h, p2/m, z12.h // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_S | frintz z12.s, p6/m, z3.s // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_D | frintz z12.d, p2/m, z31.d // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_H | frinti z16.h, p4/m, z9.h // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_S | frinti z18.s, p6/m, z27.s // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_D | frinti z26.d, p2/m, z12.d // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_H | frintx z17.h, p0/m, z9.h // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_S | frintx z27.s, p7/m, z16.s // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_D | frintx z21.d, p4/m, z23.d // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAHr | frinta h22, h10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTASr | frinta s15, s7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTADr | frinta d30, d10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f16 | frinta v24.4h, v10.4h // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTAv8f16 | frinta v5.8h, v3.8h // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f32 | frinta v23.2s, v22.2s // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f32 | frinta v28.4s, v28.4s // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f64 | frinta v3.2d, v13.2d // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIHr | frinti h31, h14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTISr | frinti s23, s9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIDr | frinti d8, d12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f16 | frinti v6.4h, v10.4h // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTIv8f16 | frinti v22.8h, v7.8h // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f32 | frinti v9.2s, v25.2s // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f32 | frinti v23.4s, v7.4s // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f64 | frinti v28.2d, v5.2d // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMHr | frintm h0, h21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMSr | frintm s22, s10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMDr | frintm d5, d30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f16 | frintm v3.4h, v8.4h // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTMv8f16 | frintm v19.8h, v26.8h // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f32 | frintm v15.2s, v8.2s // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f32 | frintm v20.4s, v26.4s // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f64 | frintm v20.2d, v11.2d // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNHr | frintn h12, h3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNSr | frintn s27, s14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNDr | frintn d30, d17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f16 | frintn v27.4h, v4.4h // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTNv8f16 | frintn v17.8h, v19.8h // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f32 | frintn v23.2s, v23.2s // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f32 | frintn v2.4s, v4.4s // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f64 | frintn v24.2d, v12.2d // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPHr | frintp h17, h31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPSr | frintp s14, s10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPDr | frintp d25, d13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f16 | frintp v22.4h, v25.4h // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTPv8f16 | frintp v18.8h, v11.8h // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f32 | frintp v31.2s, v5.2s // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f32 | frintp v0.4s, v24.4s // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f64 | frintp v1.2d, v3.2d // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXHr | frintx h4, h5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXSr | frintx s10, s28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXDr | frintx d17, d19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f16 | frintx v24.4h, v25.4h // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTXv8f16 | frintx v1.8h, v27.8h // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f32 | frintx v2.2s, v14.2s // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f32 | frintx v27.4s, v31.4s // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f64 | frintx v24.2d, v20.2d // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZHr | frintz h10, h29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZSr | frintz s11, s23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZDr | frintz d6, d11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f16 | frintz v13.4h, v5.4h // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTZv8f16 | frintz v20.8h, v21.8h // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f32 | frintz v15.2s, v19.2s // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f32 | frintz v11.4s, v18.4s // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f64 | frintz v12.2d, v22.2d // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1f16 | frsqrte h23, h26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i32 | frsqrte s23, s5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i64 | frsqrte d3, d11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f16 | frsqrte v16.4h, v15.4h // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRSQRTEv8f16 | frsqrte v14.8h, v0.8h // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv2f32 | frsqrte v6.2s, v8.2s // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f32 | frsqrte v30.4s, v21.4s // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv2f64 | frsqrte v15.2d, v14.2d // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_H | frsqrte z6.h, z30.h // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_S | frsqrte z27.s, z15.s // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_D | frsqrte z6.d, z17.d // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS16 | frsqrts h28, h26, h1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS32 | frsqrts s28, s1, s11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f16 | frsqrts v8.4h, v9.4h, v30.4h // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f32 | frsqrts v20.4s, v26.4s, v27.4s // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRSQRTS_ZZZ_H | frsqrts z10.h, z25.h, z22.h // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FSCALE_ZPmZ_H | fscale z2.h, p0/m, z2.h, z21.h // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitV[2], V1UnitV02[2] | FSQRTHr | fsqrt h13, h24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 9 | 9 | 1.00 | V1UnitV[2], V1UnitV02[2] | FSQRTSr | fsqrt s20, s15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 16 | 16 | 0.25 | V1UnitV[8], V1UnitV02[8] | FSQRTDr | fsqrt d25, d21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv4f16 | fsqrt v24.4h, v14.4h // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitV[13], V1UnitV02[13] | FSQRTv8f16 | fsqrt v12.8h, v3.8h // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitV[5], V1UnitV02[5] | FSQRTv2f32 | fsqrt v30.2s, v20.2s // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitV[9], V1UnitV02[9] | FSQRTv4f32 | fsqrt v2.4s, v24.4s // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+# CHECK-NEXT: 1 | 16 | 16 | 0.13 | V1UnitV[15], V1UnitV02[15] | FSQRTv2f64 | fsqrt v28.2d, v25.2d // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.13 V1UnitV02[15]
+# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FSQRT_ZPmZ_H | fsqrt z13.h, p3/m, z11.h // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[12]
+# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FSQRT_ZPmZ_S | fsqrt z2.s, p7/m, z0.s // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.11 V1UnitV0[9]
+# CHECK-NEXT: 1 | 16 | 16 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FSQRT_ZPmZ_D | fsqrt z17.d, p6/m, z17.d // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[14]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmI_D | fsub z12.d, p6/m, z12.d, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBHrr | fsub h20, h11, h18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBSrr | fsub s15, s4, s24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBDrr | fsub d25, d26, d4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv8f16 | fsub v13.8h, v15.8h, v17.8h // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv2f32 | fsub v1.2s, v31.2s, v27.2s // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmZ_S | fsub z24.s, p4/m, z24.s, z10.s // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZZZ_H | fsub z19.h, z8.h, z29.h // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmI_H | fsubr z22.h, p7/m, z22.h, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmZ_S | fsubr z13.s, p2/m, z13.s, z4.s // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTMAD_ZZI_D | ftmad z19.d, z19.d, z6.d, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSMUL_ZZZ_S | ftsmul z21.s, z0.s, z10.s // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSSEL_ZZZ_D | ftssel z5.d, z0.d, z15.d // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | hint #9 // HINT #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HLT | hlt #0x7a67 // HLT #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HVC | hvc #0xecb9 // HVC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | ic ialluis // IC <ic_op> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | ic ivau, x6 // IC <ic_op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x18 // INCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x17, vl3 // INCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCB_XPiI | incb x17, mul3, mul #7 // INCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x19 // INCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x17, vl3 // INCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCD_XPiI | incd x11, vl64, mul #7 // INCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x24 // INCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x23 // INCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCH_XPiI | inch x22, vl1, mul #8 // INCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl8 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z24.d // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z23.d, vl8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z20.d, vl2, mul #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z29.h // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z28.h, vl16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z29.h, vl16, mul #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z17.s // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z31.s, mul3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z12.s, vl4, mul #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCP_XP_H | incp x7, p0.h // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | INCP_ZP_D | incp z2.d, p6.d // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_B | index z8.b, #15, w14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_H | index z14.h, #11, w10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_S | index z17.s, #14, w21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_IR_D | index z5.d, #11, x15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_B | index z16.b, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_H | index z13.h, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_S | index z20.s, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_II_D | index z13.d, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_B | index z28.b, w27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_H | index z13.h, w28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_S | index z22.s, w7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RI_D | index z0.d, x25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_B | index z6.b, w24, w8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_H | index z20.h, w4, w7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_S | index z10.s, w2, w19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RR_D | index z2.d, x23, x7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v15.b[7], v6.b[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v17.h[1], v3.h[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v4.s[1], v7.s[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v22.d[1], v25.d[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v14.b[3], w12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v25.h[2], w14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v14.s[1], w29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v19.d[1], x27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZV_D | insr z4.d, d0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZR_D | insr z4.d, x14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB <option> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb #1 // ISB #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_VPZ_B | lasta b3, p1, z3.b // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_RPZ_B | lasta w16, p0, z10.b // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_VPZ_D | lastb d3, p1, z17.d // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_RPZ_D | lastb x4, p3, z31.d // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev8b | ld1 { v23.8b }, [x11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v25.8b }, [x30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v14.8b }, [x1], x26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev16b | ld1 { v12.16b }, [x19] // LD1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev16b_POST | ld1 { v24.16b }, [x28], #16 // LD1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev16b_POST | ld1 { v21.16b }, [x25], x28 // LD1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev4h | ld1 { v8.4h }, [x30] // LD1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4h_POST | ld1 { v4.4h }, [x10], #8 // LD1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4h_POST | ld1 { v17.4h }, [x12], x16 // LD1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev8h | ld1 { v24.8h }, [x27] // LD1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8h_POST | ld1 { v21.8h }, [x24], #16 // LD1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8h_POST | ld1 { v9.8h }, [x9], x27 // LD1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev2s | ld1 { v4.2s }, [x2] // LD1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2s_POST | ld1 { v19.2s }, [x27], #8 // LD1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2s_POST | ld1 { v25.2s }, [x13], x19 // LD1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev4s | ld1 { v3.4s }, [x4] // LD1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4s_POST | ld1 { v24.4s }, [x20], #16 // LD1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev4s_POST | ld1 { v29.4s }, [x25], x23 // LD1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev1d | ld1 { v24.1d }, [x9] // LD1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev1d_POST | ld1 { v23.1d }, [x3], #8 // LD1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev1d_POST | ld1 { v19.1d }, [x10], x19 // LD1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev2d | ld1 { v3.2d }, [x28] // LD1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2d_POST | ld1 { v8.2d }, [x16], #16 // LD1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev2d_POST | ld1 { v5.2d }, [x1], x29 // LD1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, Q-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov8b | ld1 { v24.8b, v25.8b }, [x6] // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8b_POST | ld1 { v17.8b, v18.8b }, [x18], #16 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8b_POST | ld1 { v18.8b, v19.8b }, [x6], x11 // LD1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov16b | ld1 { v0.16b, v1.16b }, [x14] // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov16b_POST | ld1 { v20.16b, v21.16b }, [x2], #32 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov16b_POST | ld1 { v5.16b, v6.16b }, [x17], x25 // LD1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov4h | ld1 { v25.4h, v26.4h }, [x3] // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4h_POST | ld1 { v10.4h, v11.4h }, [x14], #16 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4h_POST | ld1 { v0.4h, v1.4h }, [x24], x15 // LD1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov8h | ld1 { v1.8h, v2.8h }, [x27] // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8h_POST | ld1 { v22.8h, v23.8h }, [x13], #32 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov8h_POST | ld1 { v9.8h, v10.8h }, [x4], x13 // LD1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov2s | ld1 { v6.2s, v7.2s }, [x29] // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2s_POST | ld1 { v23.2s, v24.2s }, [x10], #16 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2s_POST | ld1 { v26.2s, v27.2s }, [x21], x29 // LD1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov4s | ld1 { v11.4s, v12.4s }, [x30] // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4s_POST | ld1 { v23.4s, v24.4s }, [x14], #32 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov4s_POST | ld1 { v12.4s, v13.4s }, [x27], x22 // LD1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov1d | ld1 { v27.1d, v28.1d }, [x7] // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov1d_POST | ld1 { v13.1d, v14.1d }, [x29], #16 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov1d_POST | ld1 { v1.1d, v2.1d }, [x7], x20 // LD1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Twov2d | ld1 { v13.2d, v14.2d }, [x13] // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2d_POST | ld1 { v13.2d, v14.2d }, [x10], #32 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Twov2d_POST | ld1 { v20.2d, v21.2d }, [x29], x28 // LD1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 2 reg, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev8b | ld1 { v7.8b, v8.8b, v9.8b }, [x12] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8b_POST | ld1 { v13.8b, v14.8b, v15.8b }, [x10], #24 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8b_POST | ld1 { v28.8b, v29.8b, v30.8b }, [x2], x21 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev16b | ld1 { v19.16b, v20.16b, v21.16b }, [x10] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev16b_POST | ld1 { v8.16b, v9.16b, v10.16b }, [x29], #48 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev16b_POST | ld1 { v14.16b, v15.16b, v16.16b }, [x5], x17 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev4h | ld1 { v10.4h, v11.4h, v12.4h }, [x28] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4h_POST | ld1 { v22.4h, v23.4h, v24.4h }, [x6], #24 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4h_POST | ld1 { v11.4h, v12.4h, v13.4h }, [x13], x23 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev8h | ld1 { v21.8h, v22.8h, v23.8h }, [x22] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8h_POST | ld1 { v26.8h, v27.8h, v28.8h }, [x2], #48 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev8h_POST | ld1 { v6.8h, v7.8h, v8.8h }, [x22], x6 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev2s | ld1 { v16.2s, v17.2s, v18.2s }, [x27] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2s_POST | ld1 { v3.2s, v4.2s, v5.2s }, [x30], #24 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2s_POST | ld1 { v14.2s, v15.2s, v16.2s }, [x11], x28 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev4s | ld1 { v0.4s, v1.4s, v2.4s }, [x24] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4s_POST | ld1 { v17.4s, v18.4s, v19.4s }, [x28], #48 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev4s_POST | ld1 { v5.4s, v6.4s, v7.4s }, [x20], x13 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev1d | ld1 { v14.1d, v15.1d, v16.1d }, [x3] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev1d_POST | ld1 { v21.1d, v22.1d, v23.1d }, [x24], #24 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev1d_POST | ld1 { v25.1d, v26.1d, v27.1d }, [x18], x14 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, D-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitL[3] | LD1Threev2d | ld1 { v12.2d, v13.2d, v14.2d }, [x15] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 1 6 6 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2d_POST | ld1 { v13.2d, v14.2d, v15.2d }, [x4], #48 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[3] | LD1Threev2d_POST | ld1 { v15.2d, v16.2d, v17.2d }, [x10], x6 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 3 reg, Q-form \\ 2 6 6 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv8b | ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x13] // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv8b_POST | ld1 { v8.8b, v9.8b, v10.8b, v11.8b }, [x30], #32 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv8b_POST | ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x20], x3 // LD1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv16b | ld1 { v13.16b, v14.16b, v15.16b, v16.16b }, [x9] // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv16b_POST | ld1 { v3.16b, v4.16b, v5.16b, v6.16b }, [x17], #64 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv16b_POST | ld1 { v10.16b, v11.16b, v12.16b, v13.16b }, [x19], x29 // LD1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv4h | ld1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x15] // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv4h_POST | ld1 { v4.4h, v5.4h, v6.4h, v7.4h }, [x12], #32 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv4h_POST | ld1 { v24.4h, v25.4h, v26.4h, v27.4h }, [x25], x0 // LD1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv8h | ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x21] // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv8h_POST | ld1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x21], #64 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv8h_POST | ld1 { v14.8h, v15.8h, v16.8h, v17.8h }, [x12], x23 // LD1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv2s | ld1 { v21.2s, v22.2s, v23.2s, v24.2s }, [x21] // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv2s_POST | ld1 { v27.2s, v28.2s, v29.2s, v30.2s }, [x11], #32 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv2s_POST | ld1 { v24.2s, v25.2s, v26.2s, v27.2s }, [x1], x22 // LD1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv4s | ld1 { v15.4s, v16.4s, v17.4s, v18.4s }, [x28] // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv4s_POST | ld1 { v14.4s, v15.4s, v16.4s, v17.4s }, [x8], #64 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv4s_POST | ld1 { v11.4s, v12.4s, v13.4s, v14.4s }, [x2], x28 // LD1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LD1Fourv1d | ld1 { v22.1d, v23.1d, v24.1d, v25.1d }, [x4] // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv1d_POST | ld1 { v3.1d, v4.1d, v5.1d, v6.1d }, [x23], #32 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LD1Fourv1d_POST | ld1 { v22.1d, v23.1d, v24.1d, v25.1d }, [x9], x22 // LD1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, D-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv2d | ld1 { v18.2d, v19.2d, v20.2d, v21.2d }, [x6] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v3.2d, v4.2d, v5.2d, v6.2d }, [x3], #64 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v6.2d, v7.2d, v8.2d, v9.2d }, [x17], x18 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i8 | ld1 { v18.b }[3], [x23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i16 | ld1 { v18.h }[3], [x1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i32 | ld1 { v8.s }[0], [x24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i64 | ld1 { v11.d }[0], [x13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v23.b }[1], [x13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v10.b }[9], [x25], x14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v6.h }[2], [x26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v30.h }[6], [x27], x3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v5.s }[1], [x10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v13.s }[3], [x6], x24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v26.d }[1], [x28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v1.d }[1], [x20], x30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z20.b }, p1/z, [x25] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z10.b }, p1/z, [x16, #-1, mul vl] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H_IMM | ld1b { z31.h }, p1/z, [x4] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H_IMM | ld1b { z5.h }, p5/z, [x8, #6, mul vl] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S_IMM | ld1b { z1.s }, p3/z, [x12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S_IMM | ld1b { z24.s }, p2/z, [x28, #1, mul vl] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D_IMM | ld1b { z25.d }, p5/z, [x2] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D_IMM | ld1b { z0.d }, p6/z, [x22, #5, mul vl] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B | ld1b { z7.b }, p0/z, [x24, x11] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H | ld1b { z26.h }, p5/z, [x5, x21] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S | ld1b { z22.s }, p3/z, [x16, x12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D | ld1b { z7.d }, p5/z, [x18, x12] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D_UXTW | ld1b { z2.d }, p0/z, [x15, z18.d, uxtw] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_S_SXTW | ld1b { z20.s }, p6/z, [x2, z0.s, sxtw] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D | ld1b { z15.d }, p4/z, [x23, z9.d] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1B_S_IMM | ld1b { z8.s }, p4/z, [z25.s, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D_IMM | ld1b { z13.d }, p2/z, [z3.d, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1D_SXTW_SCALED | ld1d { z21.d }, p1/z, [x24, z31.d, sxtw #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_SXTW | ld1d { z7.d }, p0/z, [x13, z15.d, sxtw] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1D_SCALED | ld1d { z14.d }, p1/z, [x26, z27.d, lsl #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D | ld1d { z30.d }, p7/z, [x14, z16.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_IMM | ld1d { z22.d }, p1/z, [z15.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_IMM | ld1d { z8.d }, p4/z, [z12.d, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_IMM | ld1h { z3.h }, p2/z, [x21] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_IMM | ld1h { z15.h }, p0/z, [x25, #-3, mul vl] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S_IMM | ld1h { z9.s }, p1/z, [x17] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S_IMM | ld1h { z1.s }, p3/z, [x14, #5, mul vl] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_D_IMM | ld1h { z10.d }, p3/z, [x9] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_D_IMM | ld1h { z2.d }, p7/z, [x1, #4, mul vl] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H | ld1h { z26.h }, p5/z, [x10, x19, lsl #1] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S | ld1h { z29.s }, p7/z, [x23, x11, lsl #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_D | ld1h { z2.d }, p5/z, [x30, x9, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_SXTW_SCALED | ld1h { z14.s }, p7/z, [x14, z28.s, sxtw #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_D_SXTW_SCALED | ld1h { z28.d }, p7/z, [x8, z9.d, sxtw #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_UXTW | ld1h { z16.d }, p5/z, [x7, z9.d, uxtw] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_S_UXTW | ld1h { z27.s }, p4/z, [x4, z7.s, uxtw] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_D_SCALED | ld1h { z6.d }, p7/z, [x30, z26.d, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D | ld1h { z11.d }, p2/z, [x20, z25.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z6.s }, p7/z, [z31.s] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z1.s }, p3/z, [z12.s, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z7.d }, p7/z, [z9.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z13.d }, p3/z, [z5.d, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8b | ld1r { v8.8b }, [x23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v4.8b }, [x25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v14.8b }, [x24], x14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv16b | ld1r { v8.16b }, [x24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v21.16b }, [x30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v1.16b }, [x3], x9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4h | ld1r { v28.4h }, [x9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v10.4h }, [x27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v12.4h }, [x8], x20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8h | ld1r { v3.8h }, [x16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v27.8h }, [x18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v20.8h }, [x20], x4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2s | ld1r { v10.2s }, [x20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v28.2s }, [x8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v4.2s }, [x0], x12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4s | ld1r { v11.4s }, [x3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v18.4s }, [x3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v2.4s }, [x4], x1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv1d | ld1r { v3.1d }, [x15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v16.1d }, [x2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v24.1d }, [x21], x3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2d | ld1r { v18.2d }, [x0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x16], x28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z13.b }, p0/z, [x9] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z30.b }, p6/z, [x21, #28] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_H_IMM | ld1rb { z10.h }, p1/z, [x9] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_H_IMM | ld1rb { z25.h }, p3/z, [x26, #6] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_S_IMM | ld1rb { z24.s }, p2/z, [x19] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_S_IMM | ld1rb { z16.s }, p1/z, [x8, #54] // LD1RB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_D_IMM | ld1rb { z17.d }, p7/z, [x4] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_D_IMM | ld1rb { z4.d }, p7/z, [x20, #18] // LD1RB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RD_IMM | ld1rd { z12.d }, p7/z, [x20] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RD_IMM | ld1rd { z19.d }, p5/z, [x13, #384] // LD1RD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_IMM | ld1rh { z13.h }, p7/z, [x0] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_IMM | ld1rh { z23.h }, p0/z, [x18, #56] // LD1RH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_S_IMM | ld1rh { z24.s }, p6/z, [x27] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_S_IMM | ld1rh { z6.s }, p7/z, [x1, #84] // LD1RH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_D_IMM | ld1rh { z3.d }, p4/z, [x25] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RH_D_IMM | ld1rh { z25.d }, p5/z, [x5, #108] // LD1RH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B_IMM | ld1rqb { z31.b }, p1/z, [x6] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B_IMM | ld1rqb { z21.b }, p7/z, [x29, #112] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_B | ld1rqb { z7.b }, p6/z, [x26, x26] // LD1RQB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D_IMM | ld1rqd { z10.d }, p0/z, [x28] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D_IMM | ld1rqd { z29.d }, p5/z, [x6, #-16] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_D | ld1rqd { z5.d }, p6/z, [x7, x8, lsl #3] // LD1RQD { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_H_IMM | ld1rqh { z29.h }, p3/z, [x3] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_H_IMM | ld1rqh { z29.h }, p4/z, [x30, #112] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1RQ_H | ld1rqh { z9.h }, p0/z, [x23, x11, lsl #1] // LD1RQH { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load broadcast, scalar + scalar + S \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W_IMM | ld1rqw { z11.s }, p0/z, [x26] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W_IMM | ld1rqw { z7.s }, p3/z, [x16, #-80] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RQ_W | ld1rqw { z2.s }, p0/z, [x21, x23, lsl #2] // LD1RQW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load broadcast, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_H_IMM | ld1rsb { z6.h }, p6/z, [x23] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_H_IMM | ld1rsb { z28.h }, p3/z, [x21, #43] // LD1RSB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_S_IMM | ld1rsb { z13.s }, p5/z, [x14] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_S_IMM | ld1rsb { z26.s }, p3/z, [x15, #4] // LD1RSB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_D_IMM | ld1rsb { z23.d }, p2/z, [x21] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSB_D_IMM | ld1rsb { z29.d }, p6/z, [x14, #25] // LD1RSB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_S_IMM | ld1rsh { z25.s }, p2/z, [x4] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_S_IMM | ld1rsh { z30.s }, p5/z, [x6, #124] // LD1RSH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_D_IMM | ld1rsh { z24.d }, p4/z, [x6] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSH_D_IMM | ld1rsh { z14.d }, p3/z, [x20, #98] // LD1RSH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSW_IMM | ld1rsw { z2.d }, p0/z, [x23] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RSW_IMM | ld1rsw { z18.d }, p7/z, [x11] // LD1RSW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_IMM | ld1rw { z12.s }, p7/z, [x9] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_IMM | ld1rw { z25.s }, p7/z, [x17, #60] // LD1RW { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_D_IMM | ld1rw { z22.d }, p5/z, [x1] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RW_D_IMM | ld1rw { z2.d }, p3/z, [x3, #36] // LD1RW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H_IMM | ld1sb { z28.h }, p6/z, [x9] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H_IMM | ld1sb { z22.h }, p2/z, [x19, #7, mul vl] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S_IMM | ld1sb { z22.s }, p3/z, [x23] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S_IMM | ld1sb { z2.s }, p6/z, [x22, #-2, mul vl] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D_IMM | ld1sb { z31.d }, p6/z, [x10] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D_IMM | ld1sb { z23.d }, p5/z, [x2, #-4, mul vl] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H | ld1sb { z3.h }, p5/z, [x10, x23] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S | ld1sb { z16.s }, p7/z, [x27, x16] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D | ld1sb { z13.d }, p7/z, [x28, x18] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D_UXTW | ld1sb { z30.d }, p6/z, [x22, z27.d, uxtw] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_S_UXTW | ld1sb { z23.s }, p5/z, [x17, z10.s, uxtw] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D | ld1sb { z23.d }, p2/z, [x28, z10.d] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SB_S_IMM | ld1sb { z14.s }, p4/z, [z18.s, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D_IMM | ld1sb { z5.d }, p0/z, [z25.d, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z8.s }, p3/z, [x21] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z29.s }, p4/z, [x11, #-4, mul vl] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z13.d }, p6/z, [x18] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z19.d }, p2/z, [x29, #-3, mul vl] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_S | ld1sh { z28.s }, p0/z, [x6, x28, lsl #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_D | ld1sh { z26.d }, p0/z, [x7, x12, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_S_UXTW_SCALED | ld1sh { z22.s }, p3/z, [x7, z1.s, uxtw #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_D_SXTW_SCALED | ld1sh { z3.d }, p6/z, [x11, z14.d, sxtw #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D_SXTW | ld1sh { z27.d }, p3/z, [x19, z23.d, sxtw] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_S_SXTW | ld1sh { z12.s }, p5/z, [x27, z13.s, sxtw] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_D_SCALED | ld1sh { z9.d }, p0/z, [x22, z8.d, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D | ld1sh { z22.d }, p0/z, [x27, z12.d] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_S_IMM | ld1sh { z1.s }, p2/z, [z9.s, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D_IMM | ld1sh { z11.d }, p5/z, [z30.d, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z7.d }, p1/z, [x19] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z28.d }, p1/z, [x26, #4, mul vl] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D | ld1sw { z26.d }, p4/z, [x20, x17, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SW_D_SXTW_SCALED | ld1sw { z22.d }, p1/z, [x14, z23.d, sxtw #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_SXTW | ld1sw { z4.d }, p3/z, [x20, z15.d, sxtw] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SW_D_SCALED | ld1sw { z1.d }, p4/z, [x20, z23.d, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D | ld1sw { z2.d }, p7/z, [x4, z0.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z12.d }, p7/z, [z21.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z27.d }, p3/z, [z10.d, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_SXTW_SCALED | ld1w { z9.s }, p0/z, [x18, z9.s, sxtw #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_D_UXTW_SCALED | ld1w { z14.d }, p5/z, [x26, z2.d, uxtw #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_UXTW | ld1w { z31.d }, p6/z, [x17, z2.d, uxtw] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_SXTW | ld1w { z14.s }, p2/z, [x18, z28.s, sxtw] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_D_SCALED | ld1w { z13.d }, p3/z, [x5, z11.d, lsl #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D | ld1w { z24.d }, p3/z, [x2, z17.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_IMM | ld1w { z4.s }, p0/z, [z1.s] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_IMM | ld1w { z17.s }, p6/z, [z26.s, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z31.d }, p7/z, [z22.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z2.d }, p3/z, [z6.d, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov8b | ld2 { v13.8b, v14.8b }, [x4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov8b_POST | ld2 { v20.8b, v21.8b }, [x11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov8b_POST | ld2 { v13.8b, v14.8b }, [x4], x7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov16b | ld2 { v26.16b, v27.16b }, [x16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v15.16b, v16.16b }, [x3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v24.16b, v25.16b }, [x7], x30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov4h | ld2 { v0.4h, v1.4h }, [x21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x22], x1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov8h | ld2 { v8.8h, v9.8h }, [x28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v14.8h, v15.8h }, [x19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v28.8h, v29.8h }, [x26], x7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov2s | ld2 { v2.2s, v3.2s }, [x16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov2s_POST | ld2 { v23.2s, v24.2s }, [x5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov2s_POST | ld2 { v22.2s, v23.2s }, [x11], x12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov4s | ld2 { v22.4s, v23.4s }, [x4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v27.4s, v28.4s }, [x18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v22.4s, v23.4s }, [x26], x29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov2d | ld2 { v22.2d, v23.2d }, [x17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v12.2d, v13.2d }, [x19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v6.2d, v7.2d }, [x11], x24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i8 | ld2 { v29.b, v30.b }[3], [x1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i16 | ld2 { v23.h, v24.h }[7], [x14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i32 | ld2 { v26.s, v27.s }[1], [x17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i64 | ld2 { v1.d, v2.d }[0], [x10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i8_POST | ld2 { v20.b, v21.b }[9], [x24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i8_POST | ld2 { v29.b, v30.b }[6], [x18], x19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i16_POST | ld2 { v2.h, v3.h }[3], [x12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i16_POST | ld2 { v11.h, v12.h }[3], [x18], x17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i32_POST | ld2 { v15.s, v16.s }[1], [x7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i32_POST | ld2 { v29.s, v30.s }[1], [x12], x0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i64_POST | ld2 { v1.d, v2.d }[1], [x3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i64_POST | ld2 { v10.d, v11.d }[1], [x18], x27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z9.b, z10.b }, p2/z, [x22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z28.b, z29.b }, p3/z, [x22, #4, mul vl] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B | ld2b { z26.b, z27.b }, p1/z, [x3, x12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z12.d, z13.d }, p5/z, [x24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z22.d, z23.d }, p2/z, [x21, #-2, mul vl] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D | ld2d { z22.d, z23.d }, p6/z, [x14, x4, lsl #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z5.h, z6.h }, p5/z, [x20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z27.h, z28.h }, p7/z, [x11, #14, mul vl] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 10 | 10 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitV[2], V1UnitV01[2] | LD2H | ld2h { z18.h, z19.h }, p3/z, [x9, x17, lsl #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 10 10 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitS[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv8b | ld2r { v10.8b, v11.8b }, [x20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v18.8b, v19.8b }, [x11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v28.8b, v29.8b }, [x30], x14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv16b | ld2r { v10.16b, v11.16b }, [x23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v24.16b, v25.16b }, [x1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v20.16b, v21.16b }, [x11], x7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv4h | ld2r { v25.4h, v26.4h }, [x11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v28.4h, v29.4h }, [x18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v21.4h, v22.4h }, [x2], x17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv8h | ld2r { v23.8h, v24.8h }, [x10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v19.8h, v20.8h }, [x29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v13.8h, v14.8h }, [x13], x5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv2s | ld2r { v25.2s, v26.2s }, [x19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v5.2s, v6.2s }, [x28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v4.2s, v5.2s }, [x14], x19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv4s | ld2r { v8.4s, v9.4s }, [x17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v22.4s, v23.4s }, [x5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v29.4s, v30.4s }, [x4], x18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv1d | ld2r { v9.1d, v10.1d }, [x25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v15.1d, v16.1d }, [x26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v10.1d, v11.1d }, [x28], x26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv2d | ld2r { v26.2d, v27.2d }, [x8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v14.2d, v15.2d }, [x3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v24.2d, v25.2d }, [x6], x14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z21.s, z22.s }, p4/z, [x12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z29.s, z30.s }, p2/z, [x19, #6, mul vl] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W | ld2w { z18.s, z19.s }, p6/z, [x22, x22, lsl #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev8b | ld3 { v8.8b, v9.8b, v10.8b }, [x0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v6.8b, v7.8b, v8.8b }, [x26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v20.8b, v21.8b, v22.8b }, [x25], x24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev16b | ld3 { v15.16b, v16.16b, v17.16b }, [x5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v19.16b, v20.16b, v21.16b }, [x3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v26.16b, v27.16b, v28.16b }, [x8], x29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev4h | ld3 { v15.4h, v16.4h, v17.4h }, [x8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v4.4h, v5.4h, v6.4h }, [x5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v24.4h, v25.4h, v26.4h }, [x25], x0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev8h | ld3 { v7.8h, v8.8h, v9.8h }, [x21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v4.8h, v5.8h, v6.8h }, [x26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v12.8h, v13.8h, v14.8h }, [x0], x25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev2s | ld3 { v16.2s, v17.2s, v18.2s }, [x0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v9.2s, v10.2s, v11.2s }, [x1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v27.2s, v28.2s, v29.2s }, [x23], x4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev4s | ld3 { v12.4s, v13.4s, v14.4s }, [x25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v12.4s, v13.4s, v14.4s }, [x27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v2.4s, v3.4s, v4.4s }, [x22], x21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev2d | ld3 { v10.2d, v11.2d, v12.2d }, [x18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v25.2d, v26.2d, v27.2d }, [x4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v6.2d, v7.2d, v8.2d }, [x10], x24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i8 | ld3 { v17.b, v18.b, v19.b }[2], [x27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i16 | ld3 { v18.h, v19.h, v20.h }[5], [x16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i32 | ld3 { v1.s, v2.s, v3.s }[3], [x14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i64 | ld3 { v5.d, v6.d, v7.d }[1], [x14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i8_POST | ld3 { v16.b, v17.b, v18.b }[3], [x15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i8_POST | ld3 { v14.b, v15.b, v16.b }[4], [x23], x6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i16_POST | ld3 { v11.h, v12.h, v13.h }[1], [x28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i16_POST | ld3 { v4.h, v5.h, v6.h }[2], [x5], x15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i32_POST | ld3 { v26.s, v27.s, v28.s }[0], [x14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i32_POST | ld3 { v1.s, v2.s, v3.s }[0], [x26], x20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i64_POST | ld3 { v14.d, v15.d, v16.d }[1], [x30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i64_POST | ld3 { v23.d, v24.d, v25.d }[0], [x24], x14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3B_IMM | ld3b { z29.b - z31.b }, p3/z, [x17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3B_IMM | ld3b { z23.b - z25.b }, p7/z, [x12, #18, mul vl] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3B | ld3b { z23.b - z25.b }, p3/z, [x12, x12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3D_IMM | ld3d { z20.d - z22.d }, p2/z, [x6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3D_IMM | ld3d { z1.d - z3.d }, p2/z, [x9, #-15, mul vl] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3D | ld3d { z13.d - z15.d }, p6/z, [x27, x30, lsl #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3H_IMM | ld3h { z26.h - z28.h }, p1/z, [x29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3H_IMM | ld3h { z14.h - z16.h }, p3/z, [x18, #9, mul vl] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3H | ld3h { z5.h - z7.h }, p3/z, [x6, x21, lsl #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv8b | ld3r { v24.8b, v25.8b, v26.8b }, [x10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v14.8b, v15.8b, v16.8b }, [x11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v22.8b, v23.8b, v24.8b }, [x0], x11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv16b | ld3r { v17.16b, v18.16b, v19.16b }, [x3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v7.16b, v8.16b, v9.16b }, [x29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v3.16b, v4.16b, v5.16b }, [x20], x5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv4h | ld3r { v3.4h, v4.4h, v5.4h }, [x1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v8.4h, v9.4h, v10.4h }, [x3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v4.4h, v5.4h, v6.4h }, [x0], x28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv8h | ld3r { v6.8h, v7.8h, v8.8h }, [x28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v4.8h, v5.8h, v6.8h }, [x11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v3.8h, v4.8h, v5.8h }, [x17], x0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv2s | ld3r { v18.2s, v19.2s, v20.2s }, [x24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v8.2s, v9.2s, v10.2s }, [x22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v12.2s, v13.2s, v14.2s }, [x0], x14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv4s | ld3r { v28.4s, v29.4s, v30.4s }, [x2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v21.4s, v22.4s, v23.4s }, [x22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v28.4s, v29.4s, v30.4s }, [x13], x25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv1d | ld3r { v1.1d, v2.1d, v3.1d }, [x28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v0.1d, v1.1d, v2.1d }, [x7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v22.1d, v23.1d, v24.1d }, [x9], x15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv2d | ld3r { v8.2d, v9.2d, v10.2d }, [x3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v3.2d, v4.2d, v5.2d }, [x25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v8.2d, v9.2d, v10.2d }, [x18], x13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3W_IMM | ld3w { z23.s - z25.s }, p1/z, [x8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3W_IMM | ld3w { z6.s - z8.s }, p4/z, [x0, #18, mul vl] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3W | ld3w { z27.s - z29.s }, p3/z, [x3, x6, lsl #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv8b | ld4 { v6.8b, v7.8b, v8.8b, v9.8b }, [x27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v20.8b, v21.8b, v22.8b, v23.8b }, [x10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v18.8b, v19.8b, v20.8b, v21.8b }, [x24], x11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv16b | ld4 { v11.16b, v12.16b, v13.16b, v14.16b }, [x5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v10.16b, v11.16b, v12.16b, v13.16b }, [x12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v12.16b, v13.16b, v14.16b, v15.16b }, [x4], x17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv4h | ld4 { v21.4h, v22.4h, v23.4h, v24.4h }, [x14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v10.4h, v11.4h, v12.4h, v13.4h }, [x19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x15], x17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv8h | ld4 { v9.8h, v10.8h, v11.8h, v12.8h }, [x1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v4.8h, v5.8h, v6.8h, v7.8h }, [x17], x17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv2s | ld4 { v23.2s, v24.2s, v25.2s, v26.2s }, [x24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v25.2s, v26.2s, v27.2s, v28.2s }, [x3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v22.2s, v23.2s, v24.2s, v25.2s }, [x14], x15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv4s | ld4 { v17.4s, v18.4s, v19.4s, v20.4s }, [x4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v25.4s, v26.4s, v27.4s, v28.4s }, [x19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v4.4s, v5.4s, v6.4s, v7.4s }, [x28], x3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv2d | ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x27], x4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i8 | ld4 { v4.b, v5.b, v6.b, v7.b }[12], [x27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i16 | ld4 { v5.h, v6.h, v7.h, v8.h }[0], [x4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i32 | ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i64 | ld4 { v2.d, v3.d, v4.d, v5.d }[0], [x29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i8_POST | ld4 { v26.b, v27.b, v28.b, v29.b }[4], [x13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i8_POST | ld4 { v10.b, v11.b, v12.b, v13.b }[11], [x24], x21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i16_POST | ld4 { v8.h, v9.h, v10.h, v11.h }[0], [x17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i16_POST | ld4 { v21.h, v22.h, v23.h, v24.h }[2], [x21], x24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i32_POST | ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i32_POST | ld4 { v20.s, v21.s, v22.s, v23.s }[1], [x27], x16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i64_POST | ld4 { v18.d, v19.d, v20.d, v21.d }[1], [x26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i64_POST | ld4 { v8.d, v9.d, v10.d, v11.d }[0], [x23], x0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4B_IMM | ld4b { z16.b - z19.b }, p3/z, [x23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4B_IMM | ld4b { z7.b - z10.b }, p5/z, [x3, #12, mul vl] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4B | ld4b { z7.b - z10.b }, p4/z, [x20, x12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4D_IMM | ld4d { z26.d - z29.d }, p7/z, [x10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4D_IMM | ld4d { z27.d - z30.d }, p0/z, [x6, #24, mul vl] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4D | ld4d { z7.d - z10.d }, p4/z, [x25, x8, lsl #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p4/z, [x19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p1/z, [x16, #-8, mul vl] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4H | ld4h { z10.h - z13.h }, p2/z, [x8, x28, lsl #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv8b | ld4r { v20.8b, v21.8b, v22.8b, v23.8b }, [x23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v24.8b, v25.8b, v26.8b, v27.8b }, [x15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v4.8b, v5.8b, v6.8b, v7.8b }, [x26], x6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv16b | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x29], x11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv4h | ld4r { v16.4h, v17.4h, v18.4h, v19.4h }, [x6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v14.4h, v15.4h, v16.4h, v17.4h }, [x0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v21.4h, v22.4h, v23.4h, v24.4h }, [x25], x22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv8h | ld4r { v4.8h, v5.8h, v6.8h, v7.8h }, [x23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v25.8h, v26.8h, v27.8h, v28.8h }, [x7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v13.8h, v14.8h, v15.8h, v16.8h }, [x19], x27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv2s | ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v23.2s, v24.2s, v25.2s, v26.2s }, [x29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v19.2s, v20.2s, v21.2s, v22.2s }, [x9], x0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv4s | ld4r { v7.4s, v8.4s, v9.4s, v10.4s }, [x23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v9.4s, v10.4s, v11.4s, v12.4s }, [x3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v3.4s, v4.4s, v5.4s, v6.4s }, [x10], x22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv1d | ld4r { v7.1d, v8.1d, v9.1d, v10.1d }, [x26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v11.1d, v12.1d, v13.1d, v14.1d }, [x5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v12.1d, v13.1d, v14.1d, v15.1d }, [x30], x17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv2d | ld4r { v7.2d, v8.2d, v9.2d, v10.2d }, [x8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v12.2d, v13.2d, v14.2d, v15.2d }, [x2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v17.2d, v18.2d, v19.2d, v20.2d }, [x21], x13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4W_IMM | ld4w { z18.s - z21.s }, p6/z, [x4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4W_IMM | ld4w { z21.s - z24.s }, p5/z, [x16, #-8, mul vl] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4W | ld4w { z25.s - z28.s }, p2/z, [x23, x8, lsl #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w7, [x24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w25, [x29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURXi | ldapur x20, [x13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURXi | ldapur x29, [x4, #-199] // LDAPUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURBi | ldapurb w13, [x17] // LDAPURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURBi | ldapurb w20, [x19, #124] // LDAPURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURHi | ldapurh w3, [x22] // LDAPURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURHi | ldapurh w1, [x6, #113] // LDAPURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBWi | ldapursb w7, [x8] // LDAPURSB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBWi | ldapursb w29, [x22, #-76] // LDAPURSB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBXi | ldapursb x29, [x7] // LDAPURSB <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSBXi | ldapursb x6, [x0, #-254] // LDAPURSB <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHWi | ldapursh w17, [x19] // LDAPURSH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHWi | ldapursh w26, [x18, #-114] // LDAPURSH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHXi | ldapursh x3, [x3] // LDAPURSH <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSHXi | ldapursh x13, [x25, #30] // LDAPURSH <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSWi | ldapursw x3, [x18] // LDAPURSW <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURSWi | ldapursw x21, [x25] // LDAPURSW <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARW | ldar w9, [x20] // LDAR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARW | ldar w15, [x0] // LDAR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARX | ldar x5, [x25] // LDAR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARX | ldar x11, [x2] // LDAR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARB | ldarb w16, [x21] // LDARB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARB | ldarb w14, [x30] // LDARB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARH | ldarh w26, [x25] // LDARH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDARH | ldarh w21, [x2] // LDARH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPW | ldaxp w13, w22, [x28] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPW | ldaxp w11, w19, [x20] // LDAXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPX | ldaxp x25, x8, [x16] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDAXPX | ldaxp x28, x17, [x25] // LDAXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRW | ldaxr w4, [x5] // LDAXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRW | ldaxr w10, [x7] // LDAXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRX | ldaxr x22, [x21] // LDAXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRX | ldaxr x7, [x1] // LDAXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRB | ldaxrb w12, [x30] // LDAXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRB | ldaxrb w27, [x2] // LDAXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRH | ldaxrh w30, [x16] // LDAXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDAXRH | ldaxrh w14, [x3] // LDAXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B | ldff1b { z10.b }, p3/z, [x10] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B | ldff1b { z2.b }, p5/z, [x28, x2] // LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_H | ldff1b { z2.h }, p0/z, [x14] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_H | ldff1b { z30.h }, p3/z, [x25, x18] // LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_S | ldff1b { z17.s }, p5/z, [x24] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_S | ldff1b { z17.s }, p7/z, [x11, x15] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z9.d }, p2/z, [x3] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z5.d }, p2/z, [x6, x8] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_SXTW | ldff1b { z7.d }, p3/z, [x27, z19.d, sxtw] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_S_SXTW | ldff1b { z13.s }, p3/z, [x24, z25.s, sxtw] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D | ldff1b { z27.d }, p0/z, [x13, z16.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z7.s }, p7/z, [z16.s] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z11.s }, p5/z, [z8.s, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z2.d }, p7/z, [z19.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z3.d }, p5/z, [z0.d, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z21.d }, p2/z, [x20] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z9.d }, p3/z, [x28, x30, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1D_SXTW_SCALED | ldff1d { z21.d }, p4/z, [x11, z12.d, sxtw #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_UXTW | ldff1d { z6.d }, p4/z, [x15, z1.d, uxtw] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1D_SCALED | ldff1d { z12.d }, p7/z, [x11, z28.d, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D | ldff1d { z26.d }, p4/z, [x30, z5.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z10.d }, p5/z, [z10.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z21.d }, p6/z, [z3.d, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z14.h }, p3/z, [x22] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z15.h }, p2/z, [x24, x8, lsl #1] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z23.s }, p0/z, [x12] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z18.s }, p0/z, [x7, x25, lsl #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z16.d }, p0/z, [x11] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z25.d }, p3/z, [x24, x19, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_SXTW_SCALED | ldff1h { z9.s }, p2/z, [x3, z24.s, sxtw #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_D_UXTW_SCALED | ldff1h { z7.d }, p0/z, [x8, z17.d, uxtw #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_SXTW | ldff1h { z9.d }, p5/z, [x4, z10.d, sxtw] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_S_UXTW | ldff1h { z4.s }, p4/z, [x6, z27.s, uxtw] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_D_SCALED | ldff1h { z25.d }, p1/z, [x29, z6.d, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D | ldff1h { z10.d }, p7/z, [x1, z26.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z4.s }, p1/z, [z27.s] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z5.s }, p3/z, [z8.s, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z16.d }, p5/z, [z10.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z15.d }, p2/z, [z19.d, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z0.h }, p2/z, [x2] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z29.h }, p1/z, [x16, x21] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z20.s }, p7/z, [x8] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z8.s }, p2/z, [x4, x14] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z11.d }, p4/z, [x6] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z17.d }, p4/z, [x16, x10] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_SXTW | ldff1sb { z13.d }, p2/z, [x28, z8.d, sxtw] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_S_SXTW | ldff1sb { z3.s }, p2/z, [x26, z24.s, sxtw] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D | ldff1sb { z10.d }, p7/z, [x20, z6.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z18.s }, p3/z, [z9.s] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z25.s }, p2/z, [z29.s, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z8.d }, p0/z, [z24.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z7.d }, p0/z, [z4.d, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z2.s }, p2/z, [x6] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z9.s }, p3/z, [x30, x16, lsl #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z7.d }, p4/z, [x30] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z1.d }, p0/z, [x29, x0, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_SXTW_SCALED | ldff1sh { z25.s }, p4/z, [x5, z9.s, sxtw #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_D_SXTW_SCALED | ldff1sh { z17.d }, p3/z, [x0, z25.d, sxtw #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_SXTW | ldff1sh { z12.d }, p7/z, [x5, z15.d, sxtw] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_S_UXTW | ldff1sh { z8.s }, p5/z, [x3, z21.s, uxtw] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_D_SCALED | ldff1sh { z14.d }, p6/z, [x17, z27.d, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D | ldff1sh { z23.d }, p4/z, [x22, z0.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z6.s }, p4/z, [z6.s] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z3.s }, p7/z, [z26.s, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z25.d }, p3/z, [z17.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z2.d }, p3/z, [z31.d, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z16.d }, p2/z, [x8] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z27.d }, p1/z, [x6, x11, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SW_D_UXTW_SCALED | ldff1sw { z27.d }, p3/z, [x5, z20.d, uxtw #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_SXTW | ldff1sw { z15.d }, p1/z, [x13, z26.d, sxtw] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SW_D_SCALED | ldff1sw { z24.d }, p2/z, [x7, z23.d, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D | ldff1sw { z8.d }, p3/z, [x5, z22.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z16.d }, p6/z, [z12.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z3.d }, p1/z, [z13.d, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z2.s }, p5/z, [x13] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z9.s }, p3/z, [x16, x19, lsl #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z31.d }, p6/z, [x3] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z30.d }, p4/z, [x25, x12, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_UXTW_SCALED | ldff1w { z27.s }, p6/z, [x10, z17.s, uxtw #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_D_SXTW_SCALED | ldff1w { z8.d }, p4/z, [x28, z31.d, sxtw #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_UXTW | ldff1w { z1.d }, p0/z, [x23, z14.d, uxtw] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_UXTW | ldff1w { z17.s }, p5/z, [x8, z6.s, uxtw] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_D_SCALED | ldff1w { z19.d }, p3/z, [x7, z18.d, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D | ldff1w { z23.d }, p2/z, [x16, z4.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z24.s }, p6/z, [z24.s] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z20.s }, p0/z, [z6.s, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z21.d }, p5/z, [z12.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z29.d }, p2/z, [z11.d, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z17.b }, p5/z, [x20] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z8.b }, p5/z, [x26, #1, mul vl] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_H_IMM | ldnf1b { z4.h }, p3/z, [x25] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_H_IMM | ldnf1b { z31.h }, p3/z, [x7] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_S_IMM | ldnf1b { z2.s }, p7/z, [x25] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_S_IMM | ldnf1b { z17.s }, p5/z, [x29, #2, mul vl] // LDNF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_D_IMM | ldnf1b { z6.d }, p5/z, [x26] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_D_IMM | ldnf1b { z18.d }, p4/z, [x20, #5, mul vl] // LDNF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1D_IMM | ldnf1d { z5.d }, p6/z, [x6] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1D_IMM | ldnf1d { z19.d }, p0/z, [x15, #-1, mul vl] // LDNF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_IMM | ldnf1h { z7.h }, p5/z, [x22] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_IMM | ldnf1h { z27.h }, p1/z, [x2, #6, mul vl] // LDNF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_S_IMM | ldnf1h { z18.s }, p2/z, [x13] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_S_IMM | ldnf1h { z8.s }, p2/z, [x29, #-8, mul vl] // LDNF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_D_IMM | ldnf1h { z26.d }, p5/z, [x5] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1H_D_IMM | ldnf1h { z20.d }, p0/z, [x29, #-6, mul vl] // LDNF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_H_IMM | ldnf1sb { z17.h }, p0/z, [x23] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_H_IMM | ldnf1sb { z14.h }, p0/z, [x18, #-5, mul vl] // LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_S_IMM | ldnf1sb { z23.s }, p0/z, [x3] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_S_IMM | ldnf1sb { z13.s }, p7/z, [x15, #-8, mul vl] // LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_D_IMM | ldnf1sb { z14.d }, p4/z, [x7] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SB_D_IMM | ldnf1sb { z13.d }, p7/z, [x25, #6, mul vl] // LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_S_IMM | ldnf1sh { z28.s }, p4/z, [x9] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_S_IMM | ldnf1sh { z3.s }, p1/z, [x14, #-2, mul vl] // LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_D_IMM | ldnf1sh { z1.d }, p2/z, [x0] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SH_D_IMM | ldnf1sh { z14.d }, p3/z, [x8, #3, mul vl] // LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SW_D_IMM | ldnf1sw { z8.d }, p4/z, [x9] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1SW_D_IMM | ldnf1sw { z28.d }, p4/z, [x13, #-7, mul vl] // LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_IMM | ldnf1w { z15.s }, p5/z, [x27] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_IMM | ldnf1w { z28.s }, p0/z, [x28, #-1, mul vl] // LDNF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_D_IMM | ldnf1w { z28.d }, p5/z, [x13] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1W_D_IMM | ldnf1w { z4.d }, p0/z, [x12, #2, mul vl] // LDNF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPSi | ldnp s1, s13, [x4] // LDNP <St1>, <St2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPSi | ldnp s30, s5, [x11, #-184] // LDNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPDi | ldnp d3, d12, [x21] // LDNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDNPDi | ldnp d12, d5, [x7, #-424] // LDNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LDNPQi | ldnp q0, q14, [x24] // LDNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LDNPQi | ldnp q4, q1, [x27, #80] // LDNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDNPWi | ldnp w4, w20, [x25] // LDNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDNPWi | ldnp w30, w4, [x21, #-196] // LDNP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitL[3] | LDNPXi | ldnp x7, x30, [x18] // LDNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitL[3] | LDNPXi | ldnp x5, x19, [x1, #-240] // LDNP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1B_ZRI | ldnt1b { z9.b }, p2/z, [x21] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1B_ZRI | ldnt1b { z30.b }, p5/z, [x30, #-3, mul vl] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1B_ZRR | ldnt1b { z10.b }, p5/z, [x12, x17] // LDNT1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1D_ZRI | ldnt1d { z27.d }, p2/z, [x12] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1D_ZRI | ldnt1d { z5.d }, p7/z, [x22, #6, mul vl] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1D_ZRR | ldnt1d { z28.d }, p2/z, [x14, x0, lsl #3] // LDNT1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1H_ZRI | ldnt1h { z11.h }, p0/z, [x21] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1H_ZRI | ldnt1h { z19.h }, p1/z, [x24, #-5, mul vl] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1H_ZRR | ldnt1h { z27.h }, p0/z, [x22, x24, lsl #1] // LDNT1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1W_ZRI | ldnt1w { z27.s }, p4/z, [x19] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNT1W_ZRI | ldnt1w { z15.s }, p0/z, [x22, #3, mul vl] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDNT1W_ZRR | ldnt1w { z25.s }, p4/z, [x12, x21, lsl #2] // LDNT1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPSpost | ldp s19, s15, [x24], #-64 // LDP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPDpost | ldp d9, d1, [x20], #296 // LDP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Load vector pair, immed post-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LDPQpost | ldp q18, q24, [x11], #144 // LDP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Load vector pair, immed post-index, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPSpre | ldp s10, s30, [x0, #-4]! // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDPDpre | ldp d26, d11, [x16, #-304]! // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Load vector pair, immed pre-index, S/D-form \\ 2 6 6 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 1.50 | V1UnitI, V1UnitL[2] | LDPQpre | ldp q18, q12, [x25, #960]! // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Load vector pair, immed pre-index, Q-form \\ 2 6 6 1.5 V1UnitL[2],V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDPSi | ldp s12, s31, [x20, #-192] // LDP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDPDi | ldp d26, d6, [x22, #-144] // LDP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Load vector pair, immed offset, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 1.50 | V1UnitL[2] | LDPQi | ldp q5, q19, [x9, #-448] // LDP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Load vector pair, immed offset, Q-form \\ 1 6 6 1.5 V1UnitL[2]
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDPWpost | ldp w10, w18, [x16], #-96 // LDP <Wt1>, <Wt2>, [<Xn|SP>], #<imm32> \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[3] | LDPXpost | ldp x13, x16, [x11], #288 // LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm64> \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 2 4 4 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDPWpre | ldp w7, w16, [x13, #-116]! // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>]! \\ Load pair, immed post-index or immed pre-index, normal, W-form \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[3] | LDPXpre | ldp x26, x3, [x14, #16]! // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>]! \\ Load pair, immed post-index or immed pre-index, normal, X-form \\ 2 4 4 1.0 V1UnitL[3],V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDPWi | ldp w25, w23, [x22] // LDP <Wt1>, <Wt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDPWi | ldp w3, w21, [x17, #40] // LDP <Wt1>, <Wt2>, [<Xn|SP>, #<imm32>] \\ Load pair, signed immed offset, normal, W-form \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitL[3] | LDPXi | ldp x6, x25, [x17] // LDP <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitL[3] | LDPXi | ldp x9, x21, [x3, #104] // LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm64>] \\ Load pair, signed immed offset, normal, X-form \\ 1 4 4 1.0 V1UnitL[3]
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitL[3] | LDPSWpost | ldpsw x23, x26, [x30], #-160 // LDPSW <Xt1>, <Xt2>, [<Xn|SP>], #<imm> \\ Load pair, immed post-index or immed pre-index, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitL[3] | LDPSWpre | ldpsw x19, x28, [x21, #-248]! // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! \\ Load pair, immed post-index or immed pre-index, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitL[3] | LDPSWi | ldpsw x13, x20, [x15] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitL[3] | LDPSWi | ldpsw x9, x27, [x8, #80] // LDPSW <Xt1>, <Xt2>, [<Xn|SP>, #<imm>] \\ Load pair, signed immed offset, signed words \\ 2 5 5 1.0 V1UnitI,V1UnitL[3]
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRWpost | ldr w13, [x2], #-22 // LDR <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRXpost | ldr x6, [x9], #248 // LDR <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRWpre | ldr w20, [x10, #13]! // LDR <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRXpre | ldr x23, [x20, #-24]! // LDR <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWui | ldr w19, [x15, #11620] // LDR <Wt>, [<Xn|SP>, #<pimm32>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXui | ldr x2, [x13, #18528] // LDR <Xt>, [<Xn|SP>, #<pimm64>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRBpost | ldr b0, [x15], #-18 // LDR <Bt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRHpost | ldr h25, [x4], #-156 // LDR <Ht>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRSpost | ldr s28, [x6], #162 // LDR <St>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRDpost | ldr d23, [x8], #-176 // LDR <Dt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRQpost | ldr q5, [x18], #70 // LDR <Qt>, [<Xn|SP>], #<simm> \\ Load vector reg, immed post-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRBpre | ldr b9, [x0, #-104]! // LDR <Bt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRHpre | ldr h24, [x10, #34]! // LDR <Ht>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRSpre | ldr s29, [x5, #168]! // LDR <St>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRDpre | ldr d22, [x9, #-1]! // LDR <Dt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LDRQpre | ldr q27, [x20, #-204]! // LDR <Qt>, [<Xn|SP>, #<simm>]! \\ Load vector reg, immed pre-index \\ 2 6 6 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBui | ldr b23, [x0, #349] // LDR <Bt>, [<Xn|SP>, #<pimmb>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHui | ldr h1, [x15, #3540] // LDR <Ht>, [<Xn|SP>, #<pimmh>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSui | ldr s14, [x7, #16208] // LDR <St>, [<Xn|SP>, #<pimms>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDui | ldr d4, [x17, #7368] // LDR <Dt>, [<Xn|SP>, #<pimmd>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQui | ldr q14, [x6, #4624] // LDR <Qt>, [<Xn|SP>, #<pimmq>] \\ Load vector reg, unsigned immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWl | ldr w15, test // LDR <Wt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXl | ldr x26, test // LDR <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSl | ldr s17, test // LDR <St>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDl | ldr d10, test // LDR <Dt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQl | ldr q22, test // LDR <Qt>, <label> \\ Load vector reg, literal, S/D/Q forms \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitM | LDR_PXI | ldr p0, [x28] // LDR <Pt>, [<Xn|SP>] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitM | LDR_PXI | ldr p1, [x6, #-53, mul vl] // LDR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load predicate \\ 2 6 6 2.0 V1UnitL,V1UnitM
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w30, [x10, x0] // LDR <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x13, [x4, x21] // LDR <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w25, [x18, w26, uxtw] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x20, [x29, w26, uxtw] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w26, [x12, w0, uxtw #2] // LDR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x13, [x2, w10, uxtw #3] // LDR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w13, [x18, w19, sxtw] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x5, [x26, w12, sxtw] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroW | ldr w16, [x9, w24, sxtw #2] // LDR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroW | ldr x21, [x29, w4, sxtw #3] // LDR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w19, [x15, x1, sxtx] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x25, [x4, x20, sxtx] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w3, [x1, x17, sxtx #2] // LDR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x2, [x13, x26, sxtx #3] // LDR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRWroX | ldr w1, [x18, x17, lsl #2] // LDR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRXroX | ldr x22, [x17, x3, lsl #3] // LDR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroX | ldr b8, [x30, x10] // LDR <Bt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroW | ldr b25, [x21, w8, uxtw] // LDR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroW | ldr b7, [x9, w29, sxtw] // LDR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRBroX | ldr b31, [x17, x6, sxtx] // LDR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHroX | ldr h11, [x13, x9] // LDR <Ht>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHroW | ldr h6, [x4, w4, uxtw] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHroW | ldr h28, [x3, w28, sxtw] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRHroX | ldr h3, [x15, x19, sxtx] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h24, [x27, w5, uxtw #1] // LDR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroW | ldr h22, [x28, w11, sxtw #1] // LDR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h3, [x18, x26, sxtx #1] // LDR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRHroX | ldr h8, [x23, x19, lsl #1] // LDR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s21, [x1, x29] // LDR <St>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s12, [x30, w5, uxtw] // LDR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s15, [x2, w20, sxtw] // LDR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s11, [x25, x20, sxtx] // LDR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s9, [x24, w27, uxtw #2] // LDR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroW | ldr s7, [x2, w5, sxtw #2] // LDR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s13, [x19, x28, sxtx #2] // LDR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRSroX | ldr s21, [x10, x4, lsl #2] // LDR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d10, [x23, x10] // LDR <Dt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d24, [x26, w7, uxtw] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d28, [x12, w2, sxtw] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d0, [x7, x29, sxtx] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d24, [x9, w27, uxtw #3] // LDR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroW | ldr d5, [x17, w2, sxtw #3] // LDR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d2, [x5, x16, sxtx #3] // LDR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load vector reg, register offset, extend, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRDroX | ldr d2, [x29, x18, lsl #3] // LDR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Load vector reg, register offset, scale, S/D-form \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQroX | ldr q9, [x13, x16] // LDR <Qt>, [<Xn|SP>, <Xm>] \\ Load vector reg, register offset, basic \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQroW | ldr q16, [x16, w1, uxtw] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQroW | ldr q1, [x17, w5, sxtw] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDRQroX | ldr q1, [x8, x9, sxtx] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Load vector reg, register offset, extend \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q23, [x26, w23, uxtw #4] // LDR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroW | ldr q3, [x18, w23, sxtw #4] // LDR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q2, [x28, x30, sxtx #4] // LDR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Load vector reg, register offset, extend, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 7 | 7 | 3.00 | V1UnitI, V1UnitL | LDRQroX | ldr q21, [x23, x27, lsl #4] // LDR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Load vector reg, register offset, scale, H/Q-form \\ 2 7 7 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDR_ZXI | ldr z26, [x4] // LDR <Zt>, [<Xn|SP>] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDR_ZXI | ldr z18, [x27, #16, mul vl] // LDR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Load vector \\ 1 6 6 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRBBpost | ldrb w4, [x17], #0 // LDRB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRBBpre | ldrb w27, [x23, #114]! // LDRB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBui | ldrb w26, [x19] // LDRB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBui | ldrb w29, [x18, #3179] // LDRB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroX | ldrb w16, [x25, x9] // LDRB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroW | ldrb w9, [x15, w19, uxtw] // LDRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroW | ldrb w25, [x7, w0, sxtw] // LDRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRBBroX | ldrb w0, [x18, x21, sxtx] // LDRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRHHpost | ldrh w9, [x1], #-2 // LDRH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRHHpre | ldrh w12, [x29, #-41]! // LDRH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHui | ldrh w28, [x3] // LDRH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHui | ldrh w27, [x19, #3156] // LDRH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w20, [x25, x15] // LDRH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w22, [x0, w24, uxtw] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroW | ldrh w6, [x17, w18, sxtw] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRHHroX | ldrh w21, [x13, x30, sxtx] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRHHroW | ldrh w14, [x21, w21, uxtw #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRHHroW | ldrh w0, [x29, w13, sxtw #1] // LDRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRHHroX | ldrh w11, [x20, x0, sxtx #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRHHroX | ldrh w12, [x17, x27, lsl #1] // LDRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBWpost | ldrsb w12, [x13], #-250 // LDRSB <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBXpost | ldrsb x10, [x2], #-229 // LDRSB <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBWpre | ldrsb w5, [x2, #-169]! // LDRSB <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSBXpre | ldrsb x28, [x12, #-46]! // LDRSB <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWui | ldrsb w5, [x26] // LDRSB <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWui | ldrsb w24, [x0, #3862] // LDRSB <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXui | ldrsb x6, [x0] // LDRSB <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXui | ldrsb x20, [x0, #653] // LDRSB <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroX | ldrsb w30, [x22, x21] // LDRSB <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroW | ldrsb w24, [x2, w14, uxtw] // LDRSB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroW | ldrsb w7, [x1, w8, sxtw] // LDRSB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBWroX | ldrsb w4, [x8, x25, sxtx] // LDRSB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroX | ldrsb x12, [x28, x27] // LDRSB <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroW | ldrsb x10, [x5, w9, uxtw] // LDRSB <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroW | ldrsb x19, [x23, w24, sxtw] // LDRSB <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSBXroX | ldrsb x20, [x10, x13, sxtx] // LDRSB <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHWpost | ldrsh w5, [x0], #-115 // LDRSH <Wt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHXpost | ldrsh x30, [x18], #-50 // LDRSH <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHWpre | ldrsh w27, [x15, #-45]! // LDRSH <Wt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSHXpre | ldrsh x14, [x24, #27]! // LDRSH <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWui | ldrsh w18, [x13] // LDRSH <Wt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWui | ldrsh w11, [x27, #4094] // LDRSH <Wt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXui | ldrsh x19, [x26] // LDRSH <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXui | ldrsh x19, [x9, #6652] // LDRSH <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w18, [x30, x24] // LDRSH <Wt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w13, [x25, w7, uxtw] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroW | ldrsh w3, [x16, w28, sxtw] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHWroX | ldrsh w0, [x13, x14, sxtx] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHWroW | ldrsh w0, [x5, w21, uxtw #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHWroW | ldrsh w26, [x6, w29, sxtw #1] // LDRSH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHWroX | ldrsh w22, [x26, x15, sxtx #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHWroX | ldrsh w26, [x20, x21, lsl #1] // LDRSH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x4, [x9, x24] // LDRSH <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x25, [x8, w13, uxtw] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroW | ldrsh x25, [x20, w10, sxtw] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSHXroX | ldrsh x6, [x13, x10, sxtx] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHXroW | ldrsh x15, [x0, w28, uxtw #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHXroW | ldrsh x19, [x9, w15, sxtw #1] // LDRSH <Xt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHXroX | ldrsh x1, [x17, x26, sxtx #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Load register, register offset, extend, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 5 | 5 | 3.00 | V1UnitI, V1UnitL | LDRSHXroX | ldrsh x7, [x29, x17, lsl #1] // LDRSH <Xt>, [<Xn|SP>, <Xm>, LSL #1] \\ Load register, register offset, scale by 2 \\ 2 5 5 3.0 V1UnitI,V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSWpost | ldrsw x4, [x21], #-93 // LDRSW <Xt>, [<Xn|SP>], #<simm> \\ Load register, immed post-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 3.00 | V1UnitI, V1UnitL | LDRSWpre | ldrsw x6, [x28, #96]! // LDRSW <Xt>, [<Xn|SP>, #<simm>]! \\ Load register, immed pre-index \\ 2 4 4 3.0 V1UnitL,V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWui | ldrsw x1, [x23] // LDRSW <Xt>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWui | ldrsw x6, [x19, #4552] // LDRSW <Xt>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWl | ldrsw x20, test // LDRSW <Xt>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x21, [x25, x7] // LDRSW <Xt>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x12, [x28, w12, uxtw] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x22, [x26, w21, sxtw] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x0, [x21, x19, sxtx] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x23, [x17, w19, uxtw #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroW | ldrsw x23, [x30, w11, sxtw #2] // LDRSW <Xt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x29, [x12, x5, sxtx #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDRSWroX | ldrsw x3, [x1, x17, lsl #2] // LDRSW <Xt>, [<Xn|SP>, <Xm>, LSL #2] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRWi | ldtr w12, [x9] // LDTR <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRWi | ldtr w9, [x3, #-55] // LDTR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRXi | ldtr x9, [x9] // LDTR <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRXi | ldtr x25, [x1, #103] // LDTR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRBi | ldtrb w27, [x7] // LDTRB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRBi | ldtrb w8, [x1, #-90] // LDTRB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRHi | ldtrh w13, [x21] // LDTRH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRHi | ldtrh w10, [x15, #-67] // LDTRH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBWi | ldtrsb w15, [x19] // LDTRSB <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBWi | ldtrsb w28, [x19, #-202] // LDTRSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBXi | ldtrsb x17, [x6] // LDTRSB <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSBXi | ldtrsb x0, [x11, #180] // LDTRSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHWi | ldtrsh w19, [x26] // LDTRSH <Wt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHWi | ldtrsh w16, [x28, #-233] // LDTRSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHXi | ldtrsh x26, [x22] // LDTRSH <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSHXi | ldtrsh x27, [x19, #-76] // LDTRSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSWi | ldtrsw x23, [x28] // LDTRSW <Xt>, [<Xn|SP>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDTRSWi | ldtrsw x26, [x21, #45] // LDTRSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, immed unprivileged \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURBi | ldur b24, [x3] // LDUR <Bt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURBi | ldur b9, [x25, #240] // LDUR <Bt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURHi | ldur h29, [x21] // LDUR <Ht>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURHi | ldur h5, [x23, #-5] // LDUR <Ht>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURSi | ldur s12, [x14] // LDUR <St>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURSi | ldur s22, [x10, #108] // LDUR <St>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURDi | ldur d16, [x14] // LDUR <Dt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURDi | ldur d22, [x24, #-198] // LDUR <Dt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURQi | ldur q25, [x9] // LDUR <Qt>, [<Xn|SP>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LDURQi | ldur q5, [x24, #233] // LDUR <Qt>, [<Xn|SP>, #<simm>] \\ Load vector reg, unscaled immed \\ 1 6 6 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURWi | ldur w19, [x30] // LDUR <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURWi | ldur w24, [x12, #202] // LDUR <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURXi | ldur x0, [x3] // LDUR <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURXi | ldur x14, [x14, #17] // LDUR <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURBBi | ldurb w9, [x24] // LDURB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURBBi | ldurb w12, [x5, #92] // LDURB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURHHi | ldurh w27, [x14] // LDURH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURHHi | ldurh w13, [x30, #-173] // LDURH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBWi | ldursb w5, [x8] // LDURSB <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBWi | ldursb w21, [x10, #172] // LDURSB <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBXi | ldursb x19, [x15] // LDURSB <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSBXi | ldursb x16, [x11, #-173] // LDURSB <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHWi | ldursh w21, [x12] // LDURSH <Wt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHWi | ldursh w16, [x18, #203] // LDURSH <Wt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHXi | ldursh x4, [x28] // LDURSH <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSHXi | ldursh x5, [x3, #-133] // LDURSH <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSWi | ldursw x21, [x7] // LDURSW <Xt>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDURSWi | ldursw x11, [x16, #169] // LDURSW <Xt>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPW | ldxp w23, w14, [x17] // LDXP <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPW | ldxp w2, w8, [x21] // LDXP <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPX | ldxp x5, x6, [x30] // LDXP <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 4 | 4 | 1.50 | V1UnitL[2] | LDXPX | ldxp x10, x26, [x6] // LDXP <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRW | ldxr w4, [x9] // LDXR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRW | ldxr w7, [x3] // LDXR <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRX | ldxr x6, [x27] // LDXR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRX | ldxr x3, [x4] // LDXR <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRB | ldxrb w17, [x21] // LDXRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRB | ldxrb w14, [x3] // LDXRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRH | ldxrh w14, [x1] // LDXRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRH | ldxrh w24, [x11] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w25, w0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsl x27, x7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_B | lsl z1.b, p1/m, z1.b, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_H | lsl z7.h, p3/m, z7.h, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_S | lsl z10.s, p3/m, z10.s, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_D | lsl z21.d, p7/m, z21.d, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_B | lsl z13.b, z4.b, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_H | lsl z11.h, z16.h, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_S | lsl z16.s, z11.s, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_D | lsl z18.d, z4.d, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w4, w9, w12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x29, x22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmZ_D | lsl z3.d, p2/m, z3.d, z15.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZPmZ_S | lsl z3.s, p6/m, z3.s, z8.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZZZ_S | lsl z19.s, z25.s, z25.d // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSLR_ZPmZ_H | lslr z3.h, p5/m, z3.h, z23.h // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w6, w8, w2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x26, x21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsr w0, w0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsr x23, x24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_B | lsr z21.b, p5/m, z21.b, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_H | lsr z1.h, p4/m, z1.h, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_S | lsr z24.s, p7/m, z24.s, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_D | lsr z13.d, p3/m, z13.d, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_B | lsr z3.b, z11.b, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_H | lsr z5.h, z12.h, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_S | lsr z21.s, z16.s, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_D | lsr z21.d, z15.d, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w17, w20, w15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x24, x4, x20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmZ_D | lsr z30.d, p3/m, z30.d, z28.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZPmZ_H | lsr z18.h, p3/m, z18.h, z29.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZZZ_H | lsr z7.h, z30.h, z11.d // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSRR_ZPmZ_B | lsrr z14.b, p1/m, z14.b, z16.b // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w0, w28, w19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x16, x22, x19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_B | mad z17.b, p7/m, z4.b, z5.b // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_H | mad z29.h, p4/m, z31.h, z18.h // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_S | mad z7.s, p4/m, z5.s, z29.s // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MAD_ZPmZZ_D | mad z28.d, p7/m, z18.d, z2.d // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDWrrr | madd w15, w9, w9, w29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDXrrr | madd x29, x22, x21, x21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv8i16_indexed | mla v15.8h, v22.8h, v4.h[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv2i32_indexed | mla v28.2s, v10.2s, v2.s[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv4i32 | mla v31.4s, v18.4s, v27.4s // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z1.b, p0/m, z3.b, z3.b // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_H | mla z21.h, p2/m, z31.h, z30.h // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_S | mla z24.s, p3/m, z11.s, z9.s // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLA_ZPmZZ_D | mla z2.d, p0/m, z12.d, z5.d // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv8i16_indexed | mls v25.8h, v29.8h, v0.h[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv2i32_indexed | mls v22.2s, v29.2s, v0.s[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv4i32 | mls v26.4s, v5.4s, v28.4s // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_B | mls z11.b, p1/m, z28.b, z6.b // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_H | mls z31.h, p0/m, z25.h, z24.h // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_S | mls z1.s, p5/m, z7.s, z13.s // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLS_ZPmZZ_D | mls z2.d, p1/m, z17.d, z10.d // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBWrrr | mneg w14, w30, w30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBXrrr | mneg x21, x3, x9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_S | mov z9.s, p2/m, s10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z17.b, z29.b[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z26.h, z7.h[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z14.s, z21.s[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z22.d, z14.d[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z21.s, s25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, wzr, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x3, #7680 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v30.b[12], v17.b[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v10.h[3], v17.h[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v19.s[2], v2.s[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v21.d[1], v16.d[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v5.b[12], w23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v27.h[6], w6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v21.s[0], w21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v13.d[0], x10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z30.b, p7/m, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z30.d, p7/m, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z10.h, p5/m, #72 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z19.b, p6/z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z6.d, p1/z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z12.d, p7/z, #10240 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z2.h, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z20.h, #20992 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w24, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x15, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | mov p0.b, p0/m, p6.b // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | mov p3.b, p7/z, p2.b // MOV <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | mov w21, w11 // MOV <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x14, x0 // MOV <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v21.b[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h13, v17.h[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s7, v11.s[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d27, v24.d[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z12.d, p5/m, x24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z31.d, p6/m, sp // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_B | mov z19.b, w27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_H | mov z17.h, wsp // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32 | mov w13, v12.s[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64_idx0 | mov x30, v18.d[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | mov wsp, wsp // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x1, x11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | mov v12.16b, v6.16b // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_D | mov z1.d, p3/m, z6.d // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | mov z24.d, z25.d // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w30, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x4, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z14.b, #112 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z8.h, #96 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z2.s, #2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z6.d, #4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | mov p2.b, p5.b // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv16b_ns | movi v7.16b, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv8i16 | movi v14.8h, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4i16 | movi v13.4h, #74, lsl #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v19.2s, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v0.2s, #137, lsl #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4s_msl | movi v1.4s, #122, msl #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVID | movi d16, #0000000000000000 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2d_ns | movi v13.2d, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w8, #57951 // MOVK <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w6, #34540 // MOVK <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKXi | movk x1, #56641 // MOVK <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKXi | movk x23, #3111, lsl #48 // MOVK <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNWi | mov w16, #-52527 // MOVN <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNWi | mov w27, #-47743 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x10, #-63432 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x0, #2116973299840843775 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZPmZ_B | movprfx z22.b, p0/m, z4.b // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z22.b, p0/m, z19.b, z25.b // Ignore
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZZ | movprfx z3, z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_D | fmla z3.d, p0/m, z8.d, z19.d // Ignore
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | movs p0.b, p7/z, p3.b // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | movs p4.b, p0.b // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w3, #9629 // MOVZ <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w23, #710082560 // MOVZ <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x0, #22630 // MOVZ <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x11, #5760103923406864384 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_B | msb z18.b, p1/m, z27.b, z0.b // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_H | msb z27.h, p5/m, z23.h, z1.h // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_S | msb z26.s, p2/m, z0.s, z2.s // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MSB_ZPmZZ_D | msb z1.d, p6/m, z12.d, z12.d // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr ACTLR_EL3, x18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr S3_6_C8_C12_1, x23 // MSR S<op0>_<op1>_<Cn>_<Cm>_<op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBWrrr | msub w6, w26, w13, w13 // MSUB <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBXrrr | msub x14, x28, x9, x3 // MSUB <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i16_indexed | mul v26.4h, v20.4h, v14.h[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16_indexed | mul v5.8h, v21.8h, v3.h[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv2i32_indexed | mul v29.2s, v10.2s, v3.s[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i32_indexed | mul v30.4s, v11.4s, v4.s[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_B | mul z16.b, z16.b, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_H | mul z9.h, z9.h, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_S | mul z23.s, z23.s, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZI_D | mul z15.d, z15.d, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16 | mul v3.8h, v9.8h, v8.8h // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_B | mul z17.b, p6/m, z17.b, z9.b // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_H | mul z18.h, p7/m, z18.h, z15.h // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_S | mul z29.s, p6/m, z29.s, z8.s // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZPmZ_D | mul z25.d, p1/m, z25.d, z25.d // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDWrrr | mul w8, w13, w20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDXrrr | mul x12, x8, x25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w0, w18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w25, w27, asr #6 // MVN <Wd>, <Wm>, <shift> #<wamount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x1, x21 // MVN <Xd>, <Xm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x9, x23, asr #39 // MVN <Xd>, <Xm>, <shift> #<amount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv16i8 | mvn v16.16b, v24.16b // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i16 | mvni v9.4h, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv8i16 | mvni v8.8h, #171, lsl #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv2i32 | mvni v7.2s, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i32 | mvni v22.4s, #15, lsl #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4s_msl | mvni v12.4s, #141, msl #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NAND_PPzPP | nand p5.b, p4/z, p5.b, p5.b // NAND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NANDS_PPzPP | nands p6.b, p3/z, p4.b, p5.b // NANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | neg w25, w20, lsl #4 // NEG <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | neg w0, w29, lsl #9 // NEG <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | neg w7, w28, asr #24 // NEG <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | neg x29, x11, lsl #3 // NEG <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x24, x10, lsl #54 // NEG <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x0, x16, lsr #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv1i64 | neg d18, d20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv2i64 | neg v16.2d, v14.2d // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NEG_ZPmZ_B | neg z16.b, p2/m, z15.b // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | negs w30, w22, lsl #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w8, w8, lsl #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w12, w21, asr #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | negs x24, x23, lsl #1 // NEGS <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | negs x20, x13, lsl #20 // NEGS <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | negs x1, x22, lsr #30 // NEGS <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCWr | ngc w11, w9 // NGC <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCXr | ngc x30, x4 // NGC <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSWr | ngcs w13, w22 // NGCS <Wd>, <Wm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSXr | ngcs x15, x1 // NGCS <Xd>, <Xm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | nop // NOP \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NOR_PPzPP | nor p4.b, p4/z, p0.b, p4.b // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NORS_PPzPP | nors p1.b, p0/z, p7.b, p6.b // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | not p7.b, p2/z, p6.b // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NOT_ZPmZ_S | not z29.s, p4/m, z9.s // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv8i8 | mvn v15.8b, v29.8b // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | nots p7.b, p3/z, p1.b // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z5.b, z5.b, #0x8f // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.h, z14.h, #0xff9f // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.s, z14.s, #0xfffffffd // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z27.d, z27.d, #0xfffffffffffffffb // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORN_PPzPP | orn p1.b, p2/z, p3.b, p5.b // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w2, w27, w7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w6, w28, w14, lsl #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x22, x12, x3 // ORN <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x19, x17, x0, lsl #58 // ORN <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORNv8i8 | orn v29.8b, v19.8b, v16.8b // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORNS_PPzPP | orns p3.b, p3/z, p0.b, p3.b // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, w27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXri | orr x27, x6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z4.b, z4.b, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z26.h, z26.h, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z3.s, z3.s, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z30.d, z30.d, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | orr p6.b, p4/z, p4.b, p3.b // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w14, w1, w23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w25, w22, w0, asr #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x11, x6, x13 // ORR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x26, x26, x7, lsl #62 // ORR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i16 | orr v9.4h, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv8i16 | orr v20.8h, #175 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v4.4s, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v17.4s, #119, lsl #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | orr v12.16b, v9.16b, v1.16b // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZPmZ_H | orr z28.h, p3/m, z28.h, z7.h // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | orr z8.d, z14.d, z19.d // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | orrs p7.b, p7/z, p6.b, p5.b // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ORV_VPZ_D | orv d19, p6, z31.d // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFALSE | pfalse p6.b // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFIRST_B | pfirst p0.b, p5, p0.b // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv8i8 | pmul v30.8b, v0.8b, v27.8b // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv16i8 | pmul v7.16b, v20.16b, v18.16b // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PNEXT_S | pnext p5.s, p5, p5.s // PNEXT <Pdn>.<T>, <Pv>, <Pdn>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p5, [x21] // PRFB #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p3, [x28, #-24, mul vl] // PRFB #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb pstl1strm, p7, [x5] // PRFB <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb pldl2keep, p1, [x12, #11, mul vl] // PRFB <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRR | prfb pldl1keep, p7, [x4, x9] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Xm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_UXTW_SCALED | prfb pldl3strm, p4, [x3, z15.s, uxtw] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_UXTW_SCALED | prfb pldl1strm, p7, [x28, z4.d, uxtw] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_SCALED | prfb pstl3keep, p2, [x18, z19.d] // PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl3keep, p1, [z28.s] // PRFB #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl1keep, p0, [z22.s, #21] // PRFB #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl1strm, p2, [z25.s] // PRFB <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_S_PZI | prfb pstl2strm, p1, [z31.s, #18] // PRFB <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pstl2strm, p5, [z25.d] // PRFB #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pldl2keep, p2, [z4.d, #10] // PRFB #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pstl2keep, p5, [z5.d] // PRFB <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_D_PZI | prfb pldl1keep, p1, [z31.d, #17] // PRFB <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pstl3strm, p3, [x21] // PRFD #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pldl3keep, p5, [x3, #-7, mul vl] // PRFD #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pstl3keep, p0, [x29] // PRFD <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRI | prfd pldl1strm, p3, [x15, #-16, mul vl] // PRFD <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_PRR | prfd pstl2keep, p3, [x24, x24, lsl #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_SXTW_SCALED | prfd pstl1strm, p3, [x27, z27.s, sxtw #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_UXTW_SCALED | prfd pstl1keep, p0, [x21, z2.d, uxtw #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_SCALED | prfd pldl1strm, p7, [x22, z22.d, lsl #3] // PRFD <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pldl2strm, p1, [z2.s] // PRFD #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl1keep, p7, [z10.s, #72] // PRFD #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl1keep, p3, [z19.s] // PRFD <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_S_PZI | prfd pstl2strm, p4, [z26.s, #248] // PRFD <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd #15, p1, [z17.d] // PRFD #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pldl2strm, p0, [z6.d, #24] // PRFD #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pstl1keep, p3, [z31.d] // PRFD <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFD_D_PZI | prfd pstl1strm, p7, [z10.d, #40] // PRFD <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl2strm, p3, [x17] // PRFH #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh #6, p3, [x6, #19, mul vl] // PRFH #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl3keep, p6, [x2] // PRFH <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRI | prfh pldl2keep, p6, [x18, #-4, mul vl] // PRFH <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_PRR | prfh pstl2keep, p1, [x28, x9, lsl #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_UXTW_SCALED | prfh pldl1strm, p6, [x0, z10.s, uxtw #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_UXTW_SCALED | prfh pldl3keep, p7, [x24, z21.d, uxtw #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_SCALED | prfh pstl1strm, p5, [x10, z6.d, lsl #1] // PRFH <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_PZI | prfh pldl3strm, p6, [z0.s] // PRFH <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_S_PZI | prfh pstl3strm, p0, [z30.s, #12] // PRFH <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_PZI | prfh pstl2keep, p2, [z21.d] // PRFH <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFH_D_PZI | prfh pstl2keep, p1, [z8.d, #14] // PRFH <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm pldl1strm, [x5] // PRFM <prfop>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm pstl3keep, [x19, #10160] // PRFM <prfop>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm #25, [x28] // PRFM #<imm5>, [<Xn|SP>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMui | prfm #7, [x15, #6776] // PRFM #<imm5>, [<Xn|SP>, #<pimm>] \\ Load register, unsigned immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMl | prfm pldl3strm, test // PRFM <prfop>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMl | prfm pldl1keep, test // PRFM #<imm5>, <label> \\ Load register, literal \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pldl1keep, [x25, x16] // PRFM <prfop>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | rprfm #16, x18, [x1] // PRFM #<imm5>, [<Xn|SP>, <Xm>] \\ Load register, register offset, basic \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl1keep, [x14, w8, uxtw] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm plil3keep, [x8, w5, uxtw] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl2keep, [x16, w16, sxtw] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm plil2strm, [x25, w11, sxtw] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl2strm, [x3, x24, sxtx] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | rprfm #49, x2, [x5] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX] \\ Load register, register offset, extend \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl2keep, [x10, w29, uxtw #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pstl1strm, [x9, w27, uxtw #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, UXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl1keep, [x24, w0, sxtw #3] // PRFM <prfop>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroW | prfm pldl3keep, [x30, w25, sxtw #3] // PRFM #<imm5>, [<Xn|SP>, <Wm>, SXTW #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl1strm, [x18, x20, sxtx #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl2strm, [x29, x25, sxtx #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, SXTX #3] \\ Load register, register offset, extend, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm pstl3keep, [x2, x5, lsl #3] // PRFM <prfop>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFMroX | prfm plil1keep, [x22, x3, lsl #3] // PRFM #<imm5>, [<Xn|SP>, <Xm>, LSL #3] \\ Load register, register offset, scale by 4/8 \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pstl1keep, [x7] // PRFUM <prfop>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pldl2keep, [x7, #-37] // PRFUM <prfop>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum pstl3keep, [x21] // PRFUM #<imm5>, [<Xn|SP>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | PRFUMi | prfum #23, [x6, #-131] // PRFUM #<imm5>, [<Xn|SP>, #<simm>] \\ Load register, unscaled immed \\ 1 4 4 3.0 V1UnitL
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pldl2strm, p2, [x4] // PRFW #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw #6, p4, [x7, #6, mul vl] // PRFW #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pldl3keep, p3, [x2] // PRFW <prfop>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRI | prfw pstl1keep, p7, [x2, #-31, mul vl] // PRFW <prfop>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_PRR | prfw pstl1keep, p4, [x18, x21, lsl #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_UXTW_SCALED | prfw pldl2strm, p0, [x15, z6.s, uxtw #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_SXTW_SCALED | prfw pstl2keep, p0, [x27, z18.d, sxtw #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_SCALED | prfw pstl2keep, p3, [x19, z8.d, lsl #2] // PRFW <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw #7, p7, [z27.s] // PRFW #<imm4>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pstl1strm, p5, [z16.s, #72] // PRFW #<imm4>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pldl3keep, p4, [z2.s] // PRFW <prfop>, <Pg>, [<Zn>.S] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_S_PZI | prfw pstl3keep, p2, [z0.s, #40] // PRFW <prfop>, <Pg>, [<Zn>.S, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw #7, p1, [z20.d] // PRFW #<imm4>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw #7, p2, [z10.d, #108] // PRFW #<imm4>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw pstl1keep, p6, [z12.d] // PRFW <prfop>, <Pg>, [<Zn>.D] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFW_D_PZI | prfw pstl2strm, p0, [z18.d, #60] // PRFW <prfop>, <Pg>, [<Zn>.D, #<imm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | psb csync // PSB CSYNC \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DSB | pssbb // PSSBB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTEST_PP | ptest p0, p5.b // PTEST <Pg>, <Pn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_B | ptrue p2.b // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_D | ptrue p3.d, pow2 // PTRUE <Pd>.<T>, <pattern> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_H | ptrue p0.h // PTRUE <Pd>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PTRUE_S | ptrue p4.s, #21 // PTRUE <Pd>.<T>, #<uimm5> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_H | ptrues p3.h // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_D | ptrues p3.d, vl32 // PTRUES <Pd>.<T>, <pattern> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_B | ptrues p0.b // PTRUES <Pd>.<T> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_D | ptrues p2.d, vl128 // PTRUES <Pd>.<T>, #<uimm5> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKHI_PP | punpkhi p4.h, p4.b // PUNPKHI <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKLO_PP | punpklo p1.h, p4.b // PUNPKLO <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v2i32 | raddhn v17.2s, v22.2d, v5.2d // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v4i32 | raddhn2 v21.4s, v11.2d, v1.2d // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RBITv16i8 | rbit v16.16b, v21.16b // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITWr | rbit w27, w10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITXr | rbit x30, x0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | RBIT_ZPmZ_S | rbit z23.s, p3/m, z10.s // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | RDFFR_PPz | rdffr p2.b, p1/z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | RDFFR_P | rdffr p5.b // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 0.33 | V1UnitI[6], V1UnitM[6] | RDFFRS_PPz | rdffrs p7.b, p2/z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 0.33 V1UnitM[6]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | RDVLI_XI | rdvl x20, #-20 // RDVL <Xd>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret x14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | REV_PP_H | rev p1.h, p2.h // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REV_ZZ_D | rev z11.d, z24.d // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVWr | rev w19, w20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x30, x15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV16v16i8 | rev16 v5.16b, v26.16b // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Wr | rev16 w1, w25 // REV16 <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Xr | rev16 x27, x11 // REV16 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV32v8i16 | rev32 v22.8h, v4.8h // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV32Xr | rev32 x30, x6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x5, x2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV64v2i32 | rev64 v0.2s, v19.2s // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVB_ZPmZ_D | revb z3.d, p2/m, z21.d // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVH_ZPmZ_D | revh z1.d, p5/m, z19.d // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVW_ZPmZ_D | revw z16.d, p1/m, z3.d // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w20, w13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x5, x8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w29, w26, w0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x4, x13, x3 // ROR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w26, w0, w28 // RORV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x21, x29, x17 // RORV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i8_shift | rshrn v24.8b, v0.8h, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i16_shift | rshrn v8.4h, v24.4s, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv2i32_shift | rshrn v12.2s, v12.2d, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv16i8_shift | rshrn2 v1.16b, v16.8h, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i16_shift | rshrn2 v1.8h, v28.4s, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i32_shift | rshrn2 v20.4s, v19.2d, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv8i16_v8i8 | rsubhn v3.8b, v9.8h, v16.8h // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv2i64_v4i32 | rsubhn2 v31.4s, v12.2d, v15.2d // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABAv16i8 | saba v8.16b, v27.16b, v13.16b // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv2i32_v2i64 | sabal v2.2d, v5.2s, v31.2s // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv4i32_v2i64 | sabal2 v21.2d, v15.4s, v13.4s // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDv2i32 | sabd v12.2s, v11.2s, v27.2s // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SABD_ZPmZ_S | sabd z14.s, p1/m, z14.s, z23.s // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv2i32_v2i64 | sabdl v28.2d, v4.2s, v19.2s // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv16i8_v8i16 | sabdl2 v10.8h, v30.16b, v4.16b // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SADALPv8i8_v4i16 | sadalp v3.4h, v5.8b // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i8_v8i16 | saddl v7.8h, v3.8b, v23.8b // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i16_v4i32 | saddl2 v21.4s, v5.8h, v10.8h // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLPv16i8_v8i16 | saddlp v13.8h, v29.16b // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv8i8v | saddlv h18, v28.8b // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SADDLVv16i8v | saddlv h30, v4.16b // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i16v | saddlv s24, v29.4h // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv8i16v | saddlv s22, v23.8h // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i32v | saddlv d2, v27.4s // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_B | saddv d19, p6, z1.b // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_H | saddv d7, p2, z14.h // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_S | saddv d4, p7, z27.s // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv4i16_v4i32 | saddw v8.4s, v0.4s, v1.4h // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv16i8_v8i16 | saddw2 v24.8h, v10.8h, v30.16b // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCWr | sbc w0, w16, w1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCXr | sbc x19, x3, x9 // SBC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSWr | sbcs w26, w28, w0 // SBCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSXr | sbcs x8, x26, x29 // SBCS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sbfiz w14, w5, #21, #8 // SBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfiz x14, x1, #56, #2 // SBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sbfiz w24, w11, #5, #20 // SBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfx x14, x1, #36, #20 // SBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | asr w16, w16, #31 // SBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sbfx x14, x28, #53, #8 // SBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWHri | scvtf h18, w17, #30 // SCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWSri | scvtf s14, w9, #19 // SCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSWDri | scvtf d16, w3, #13 // SCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXHri | scvtf h28, x25, #23 // SCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXSri | scvtf s27, x19, #5 // SCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFSXDri | scvtf d15, x22, #32 // SCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWHri | scvtf h22, w7 // SCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWSri | scvtf s22, w10 // SCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUWDri | scvtf d23, w6 // SCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXHri | scvtf h21, x12 // SCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXSri | scvtf s25, x28 // SCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXDri | scvtf d12, x0 // SCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFh | scvtf h4, h8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFs | scvtf s29, s12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFd | scvtf d1, d12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i16_shift | scvtf v25.4h, v13.4h, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | SCVTFv8i16_shift | scvtf v4.8h, v8.8h, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i32_shift | scvtf v5.2s, v2.2s, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i32_shift | scvtf v2.4s, v24.4s, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i64_shift | scvtf v11.2d, v2.2d, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i16 | scvtf h5, h14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i32 | scvtf s5, s16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i64 | scvtf d12, d11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f16 | scvtf v22.4h, v10.4h // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | SCVTFv8f16 | scvtf v16.8h, v13.8h // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f32 | scvtf v9.2s, v31.2s // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f32 | scvtf v2.4s, v7.4s // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f64 | scvtf v18.2d, v11.2d // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | SCVTF_ZPmZ_HtoH | scvtf z3.h, p3/m, z29.h // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoH | scvtf z1.h, p5/m, z27.s // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoS | scvtf z30.s, p4/m, z29.s // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoD | scvtf z18.d, p3/m, z16.s // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoH | scvtf z18.h, p1/m, z14.d // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoS | scvtf z10.s, p1/m, z11.d // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoD | scvtf z3.d, p2/m, z27.d // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 12 | 12 | 0.08 | V1UnitI[12], V1UnitM[12], V1UnitM0[12] | SDIVWr | sdiv w6, w28, w24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitI[20], V1UnitM[20], V1UnitM0[20] | SDIVXr | sdiv x19, x2, x14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
+# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | SDIV_ZPmZ_S | sdiv z24.s, p1/m, z24.s, z14.s // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | SDIV_ZPmZ_D | sdiv z7.d, p6/m, z7.d, z20.d // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | SDIVR_ZPmZ_S | sdivr z10.s, p2/m, z10.s, z7.s // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | SDIVR_ZPmZ_D | sdivr z0.d, p3/m, z0.d, z9.d // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZI_S | sdot z6.s, z29.b, z0.b[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZI_D | sdot z0.d, z18.h, z10.h[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZ_S | sdot z28.s, z30.b, z14.b // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZ_D | sdot z19.d, z5.h, z8.h // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTlanev16i8 | sdot v2.4s, v27.16b, v5.4b[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTv8i8 | sdot v3.2s, v20.8b, v10.8b // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | sel p1.b, p7, p5.b, p4.b // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_H | sel z0.h, p7, z13.h, z13.h // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SETFFR | setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sev // SEV \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sevl // SEVL \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHADDv16i8 | shadd v25.16b, v1.16b, v10.16b // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLd | shl d17, d3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i8_shift | shl v23.8b, v18.8b, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i16_shift | shl v0.8h, v23.8h, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv4i32_shift | shl v0.4s, v18.4s, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv2i64_shift | shl v20.2d, v28.2d, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i8 | shll v3.8h, v13.8b, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i16 | shll v26.4s, v18.4h, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv2i32 | shll v4.2d, v25.2s, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv16i8 | shll2 v12.8h, v28.16b, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i16 | shll2 v11.4s, v22.8h, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i32 | shll2 v2.2d, v29.4s, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i8_shift | shrn v27.8b, v23.8h, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i16_shift | shrn v17.4h, v1.4s, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv2i32_shift | shrn v13.2s, v0.2d, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv16i8_shift | shrn2 v4.16b, v29.8h, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i16_shift | shrn2 v9.8h, v18.4s, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i32_shift | shrn2 v5.4s, v12.2d, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHSUBv8i16 | shsub v15.8h, v5.8h, v27.8h // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLId | sli d7, d19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv16i8_shift | sli v16.16b, v26.16b, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv4i16_shift | sli v14.4h, v10.4h, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i32_shift | sli v29.2s, v14.2s, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i64_shift | sli v25.2d, v21.2d, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMADDLrrr | smaddl x17, w27, w30, x3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZI_S | smax z3.s, z3.s, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZPmZ_B | smax z0.b, p5/m, z0.b, z20.b // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXv16i8 | smax v30.16b, v3.16b, v30.16b // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXPv8i16 | smaxp v21.8h, v16.8h, v7.8h // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv8i8v | smaxv b4, v30.8b // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMAXVv16i8v | smaxv b15, v16.16b // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i16v | smaxv h28, v14.4h // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv8i16v | smaxv h6, v19.8h // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i32v | smaxv s3, v14.4s // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_B | smaxv b19, p4, z14.b // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_H | smaxv h0, p6, z20.h // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_S | smaxv s11, p2, z28.s // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_D | smaxv d24, p5, z24.d // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SMC | smc #0x7e57 // SMC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZI_S | smin z21.s, z21.s, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZPmZ_S | smin z22.s, p0/m, z22.s, z30.s // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINv4i32 | smin v29.4s, v24.4s, v24.4s // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINPv8i16 | sminp v7.8h, v27.8h, v7.8h // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMINVv8i8v | sminv b6, v11.8b // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMINVv16i8v | sminv b24, v8.16b // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i16v | sminv h24, v23.4h // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMINVv8i16v | sminv h2, v9.8h // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i32v | sminv s16, v15.4s // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_B | sminv b4, p2, z10.b // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_H | sminv h15, p7, z10.h // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_S | sminv s29, p0, z27.s // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_D | sminv d17, p2, z18.d // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i16_indexed | smlal v16.4s, v9.4h, v11.h[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv2i32_indexed | smlal v0.2d, v25.2s, v1.s[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_indexed | smlal2 v1.4s, v9.8h, v0.h[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i32_indexed | smlal2 v30.2d, v22.4s, v7.s[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i8_v8i16 | smlal v25.8h, v24.8b, v28.8b // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_v4i32 | smlal2 v30.4s, v31.8h, v13.8h // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_indexed | smlsl v14.4s, v23.4h, v12.h[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv2i32_indexed | smlsl v25.2d, v27.2s, v1.s[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_indexed | smlsl2 v12.4s, v11.8h, v12.h[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i32_indexed | smlsl2 v11.2d, v28.4s, v7.s[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_v4i32 | smlsl v11.4s, v14.4h, v15.4h // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_v4i32 | smlsl2 v21.4s, v27.8h, v16.8h // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SMMLA | smmla v0.4s, v17.16b, v31.16b // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMSUBLrrr | smnegl x3, w23, w18 // SMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to32_idx0 | smov w15, v22.b[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to32 | smov w6, v28.b[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to32_idx0 | smov w26, v27.h[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to32 | smov w18, v29.h[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to64_idx0 | smov x21, v0.b[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to64 | smov x16, v29.b[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to64_idx0 | smov x9, v27.h[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to64 | smov x4, v21.h[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi32to64_idx0 | smov x15, v3.s[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi32to64 | smov x5, v29.s[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMSUBLrrr | smsubl x8, w24, w13, x6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_B | smulh z11.b, p5/m, z11.b, z17.b // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_H | smulh z8.h, p0/m, z8.h, z4.h // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_S | smulh z27.s, p7/m, z27.s, z30.s // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SMULH_ZPmZ_D | smulh z4.d, p7/m, z4.d, z28.d // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | SMULHrr | smulh x8, x29, x17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMADDLrrr | smull x19, w0, w6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i16_indexed | smull v3.4s, v26.4h, v1.h[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_indexed | smull v31.2d, v23.2s, v6.s[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv8i16_indexed | smull2 v13.4s, v18.8h, v0.h[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_indexed | smull2 v11.2d, v1.4s, v7.s[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_v2i64 | smull v28.2d, v26.2s, v20.2s // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_v2i64 | smull2 v7.2d, v14.4s, v15.4s // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv1i64 | sqabs d15, d26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv8i16 | sqabs v25.8h, v24.8h // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_B | sqadd z1.b, z1.b, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_H | sqadd z18.h, z18.h, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_D | sqadd z3.d, z3.d, #158 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZZZ_D | sqadd z19.d, z27.d, z28.d // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv1i16 | sqadd h12, h18, h10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv2i32 | sqadd v15.2s, v13.2s, v28.2s // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x26, w26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x16, w16, vl64 // SQDECB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x4, w4, vl1, mul #16 // SQDECB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x4 // SQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x28, vl6 // SQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiI | sqdecb x20, vl7, mul #4 // SQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x1, w1 // SQDECD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x11, w11, mul3 // SQDECD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiWdI | sqdecd x14, w14, vl2, mul #16 // SQDECD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x11, vl5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x21, all, mul #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z27.d // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z2.d, vl128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z23.d, vl1, mul #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x7, w7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x10, w10, vl128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x16, w16, vl6, mul #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x17, vl128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x27, vl128, mul #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z16.h // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z21.h, vl6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z7.h, mul3, mul #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XPWd_B | sqdecp x1, p4.b, w1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XP_D | sqdecp x26, p6.d // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | SQDECP_ZP_D | sqdecp z10.d, p3.d // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x13, w13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x2, w2, pow2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x26, w26, vl8, mul #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x17, vl128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x13, mul4, mul #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z7.s // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z10.s, pow2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z28.s, vl2, mul #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i32_indexed | sqdmlal s23, h16, v4.h[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i64_indexed | sqdmlal d12, s18, v3.s[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_indexed | sqdmlal v20.4s, v30.4h, v12.h[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv2i32_indexed | sqdmlal v11.2d, v24.2s, v0.s[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_indexed | sqdmlal2 v2.4s, v17.8h, v5.h[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i32_indexed | sqdmlal2 v23.2d, v30.4s, v6.s[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALi32 | sqdmlal d16, s12, s15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_v4i32 | sqdmlal v8.4s, v24.4h, v31.4h // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_v4i32 | sqdmlal2 v29.4s, v11.8h, v13.8h // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i32_indexed | sqdmlsl s26, h21, v11.h[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i64_indexed | sqdmlsl d6, s16, v3.s[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_indexed | sqdmlsl v4.4s, v22.4h, v13.h[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv2i32_indexed | sqdmlsl v26.2d, v7.2s, v3.s[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_indexed | sqdmlsl2 v2.4s, v28.8h, v4.h[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i32_indexed | sqdmlsl2 v4.2d, v3.4s, v3.s[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLi32 | sqdmlsl d13, s21, s8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_v4i32 | sqdmlsl v11.4s, v19.4h, v5.4h // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_v4i32 | sqdmlsl2 v27.4s, v8.8h, v22.8h // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16_indexed | sqdmulh h14, h17, v6.h[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i32_indexed | sqdmulh s19, s6, v6.s[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv4i16_indexed | sqdmulh v8.4h, v16.4h, v5.h[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32_indexed | sqdmulh v16.2s, v24.2s, v7.s[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16 | sqdmulh h26, h21, h17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32 | sqdmulh v20.2s, v11.2s, v29.2s // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i32_indexed | sqdmull s25, h5, v1.h[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i64_indexed | sqdmull d29, s23, v0.s[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i16_indexed | sqdmull v8.4s, v19.4h, v1.h[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_indexed | sqdmull v20.2d, v10.2s, v6.s[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_indexed | sqdmull2 v10.4s, v25.8h, v0.h[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i32_indexed | sqdmull2 v4.2d, v29.4s, v2.s[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLi32 | sqdmull d19, s2, s0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_v2i64 | sqdmull v14.2d, v23.2s, v13.2s // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_v4i32 | sqdmull2 v12.4s, v11.8h, v1.8h // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x12, w12 // SQINCB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x1, w1, vl8 // SQINCB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x16, w16, vl2, mul #16 // SQINCB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x5 // SQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x4, vl6 // SQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiI | sqincb x30, all, mul #7 // SQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x28, w28 // SQINCD <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x16, w16, vl8 // SQINCD <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiWdI | sqincd x22, w22, vl6, mul #16 // SQINCD <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x17, vl5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x13, vl64 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z24.d // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z10.d, vl128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z29.d, vl128, mul #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x28, w28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x30, w30, vl1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x16, w16, vl4, mul #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x10, vl64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x16, pow2, mul #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z3.h // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z23.h, vl4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z6.h, vl128, mul #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XPWd_H | sqincp x13, p2.h, w13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XP_H | sqincp x0, p7.h // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | SQINCP_ZP_H | sqincp z9.h, p1.h // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x24, w24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x16, w16, mul4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x27, w27, vl32, mul #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x25, vl7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x21, vl8, mul #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z30.s // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z8.s, mul3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z0.s, vl5, mul #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv1i64 | sqneg d24, d22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv16i8 | sqneg v30.16b, v15.16b // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i16_indexed | sqrdmlah h14, h4, v6.h[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32_indexed | sqrdmlah s24, s17, v6.s[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv4i16_indexed | sqrdmlah v17.4h, v18.4h, v4.h[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv2i32_indexed | sqrdmlah v10.2s, v17.2s, v3.s[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32 | sqrdmlah s3, s3, s5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv8i16 | sqrdmlah v16.8h, v30.8h, v28.8h // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i16_indexed | sqrdmlsh h13, h26, v4.h[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32_indexed | sqrdmlsh s26, s29, v7.s[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv8i16_indexed | sqrdmlsh v1.8h, v21.8h, v8.h[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16_indexed | sqrdmlsh v8.4h, v11.4h, v1.h[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv2i32_indexed | sqrdmlsh v20.2s, v29.2s, v4.s[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i32_indexed | sqrdmlsh v21.4s, v9.4s, v1.s[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32 | sqrdmlsh s30, s20, s13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16 | sqrdmlsh v20.4h, v2.4h, v23.4h // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16_indexed | sqrdmulh h3, h25, v2.h[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i32_indexed | sqrdmulh s9, s24, v4.s[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv8i16_indexed | sqrdmulh v0.8h, v15.8h, v0.h[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32_indexed | sqrdmulh v6.2s, v29.2s, v4.s[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16 | sqrdmulh h5, h2, h20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32 | sqrdmulh v31.2s, v17.2s, v4.2s // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv1i64 | sqrshl d6, d1, d30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv8i8 | sqrshl v15.8b, v26.8b, v21.8b // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNb | sqrshrn b6, h24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNh | sqrshrn h11, s22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNs | sqrshrn s4, d9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i8_shift | sqrshrn v31.8b, v31.8h, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i16_shift | sqrshrn v27.4h, v11.4s, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv2i32_shift | sqrshrn v4.2s, v30.2d, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv16i8_shift | sqrshrn2 v11.16b, v30.8h, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i16_shift | sqrshrn2 v14.8h, v3.4s, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i32_shift | sqrshrn2 v13.4s, v28.2d, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNb | sqrshrun b5, h0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNh | sqrshrun h25, s11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNs | sqrshrun s15, d18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i8_shift | sqrshrun v0.8b, v3.8h, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i16_shift | sqrshrun v5.4h, v8.4s, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv2i32_shift | sqrshrun v7.2s, v8.2d, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv16i8_shift | sqrshrun2 v14.16b, v14.8h, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i16_shift | sqrshrun2 v9.8h, v16.4s, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i32_shift | sqrshrun2 v12.4s, v23.2d, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLb | sqshl b15, b3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLh | sqshl h21, h0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLs | sqshl s26, s9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLd | sqshl d8, d23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8_shift | sqshl v25.16b, v26.16b, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv4i16_shift | sqshl v29.4h, v1.4h, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i32_shift | sqshl v0.2s, v5.2s, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i64_shift | sqshl v11.2d, v2.2d, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv1i32 | sqshl s17, s4, s23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8 | sqshl v23.16b, v23.16b, v23.16b // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUb | sqshlu b3, b27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUh | sqshlu h23, h4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUs | sqshlu s29, s29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUd | sqshlu d14, d5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i8_shift | sqshlu v11.8b, v17.8b, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i16_shift | sqshlu v18.8h, v8.8h, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv4i32_shift | sqshlu v25.4s, v7.4s, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv2i64_shift | sqshlu v19.2d, v14.2d, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNb | sqshrn b17, h30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNh | sqshrn h30, s15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNs | sqshrn s16, d0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i8_shift | sqshrn v3.8b, v25.8h, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i16_shift | sqshrn v23.4h, v14.4s, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv2i32_shift | sqshrn v6.2s, v29.2d, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv16i8_shift | sqshrn2 v31.16b, v31.8h, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i16_shift | sqshrn2 v13.8h, v6.4s, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i32_shift | sqshrn2 v30.4s, v0.2d, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNb | sqshrun b3, h16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNh | sqshrun h11, s10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNs | sqshrun s18, d1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv8i8_shift | sqshrun v21.8b, v27.8h, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv4i16_shift | sqshrun v18.4h, v19.4s, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv2i32_shift | sqshrun v2.2s, v14.2d, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv16i8_shift | sqshrun2 v10.16b, v28.8h, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv8i16_shift | sqshrun2 v4.8h, v28.4s, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv4i32_shift | sqshrun2 v7.4s, v18.2d, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_B | sqsub z13.b, z13.b, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_H | sqsub z28.h, z28.h, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_S | sqsub z11.s, z11.s, #14 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZZZ_S | sqsub z28.s, z9.s, z12.s // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv1i8 | sqsub b3, b13, b12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv8i16 | sqsub v20.8h, v18.8h, v12.8h // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv1i8 | sqxtn b11, h22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv2i32 | sqxtn v3.2s, v17.2d // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv8i16 | sqxtn2 v17.8h, v27.4s // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv1i8 | sqxtun b30, h18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv8i8 | sqxtun v26.8b, v21.8h // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv16i8 | sqxtun2 v22.16b, v6.8h // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRHADDv8i8 | srhadd v29.8b, v3.8b, v8.8b // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRId | sri d30, d17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv16i8_shift | sri v23.16b, v30.16b, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv4i16_shift | sri v1.4h, v0.4h, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i32_shift | sri v28.2s, v6.2s, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i64_shift | sri v8.2d, v19.2d, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv1i64 | srshl d30, d8, d8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv8i8 | srshl v20.8b, v23.8b, v27.8b // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRd | srshr d20, d18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i8_shift | srshr v20.8b, v0.8b, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i16_shift | srshr v27.8h, v19.8h, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i32_shift | srshr v8.2s, v20.2s, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i64_shift | srshr v31.2d, v17.2d, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAd | srsra d13, d10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv16i8_shift | srsra v31.16b, v15.16b, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv4i16_shift | srsra v14.4h, v27.4h, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i32_shift | srsra v17.2s, v8.2s, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i64_shift | srsra v22.2d, v4.2d, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DSB | ssbb // SSBB \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv1i64 | sshl d29, d30, d9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv2i64 | sshl v13.2d, v7.2d, v27.2d // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v9.8h, v2.8b, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i16_shift | sshll v12.4s, v3.4h, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv2i32_shift | sshll v17.2d, v6.2s, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv16i8_shift | sshll2 v28.8h, v12.16b, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i16_shift | sshll2 v29.4s, v22.8h, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v17.2d, v13.4s, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRd | sshr d3, d18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv8i8_shift | sshr v20.8b, v28.8b, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv4i16_shift | sshr v20.4h, v23.4h, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i32_shift | sshr v13.2s, v23.2s, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i64_shift | sshr v3.2d, v8.2d, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAd | ssra d28, d30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv8i8_shift | ssra v9.8b, v18.8b, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv4i16_shift | ssra v21.4h, v24.4h, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i32_shift | ssra v28.2s, v17.2s, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i64_shift | ssra v0.2d, v23.2d, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv4i16_v4i32 | ssubl v13.4s, v9.4h, v5.4h // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv8i16_v4i32 | ssubl2 v18.4s, v29.8h, v17.8h // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv2i32_v2i64 | ssubw v5.2d, v13.2d, v4.2s // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv8i16_v4i32 | ssubw2 v4.4s, v26.4s, v31.8h // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b | st1 { v18.8b }, [x15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b | st1 { v31.16b }, [x29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h | st1 { v19.4h }, [x7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h | st1 { v27.8h }, [x17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s | st1 { v25.2s }, [x6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s | st1 { v22.4s }, [x19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d | st1 { v20.1d }, [x10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d | st1 { v8.2d }, [x15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v16.8b }, [x14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v10.16b }, [x8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v29.4h }, [x17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v14.8h }, [x28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v18.2s }, [x20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v28.4s }, [x1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v17.1d }, [x27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v30.2d }, [x4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v13.8b }, [x8], x7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v4.16b }, [x7], x26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v17.4h }, [x10], x4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v18.8h }, [x15], x1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v6.2s }, [x17], x24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v26.4s }, [x20], x29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v13.1d }, [x3], x20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v15.2d }, [x21], x11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b | st1 { v8.8b, v9.8b }, [x18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b | st1 { v1.16b, v2.16b }, [x4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h | st1 { v22.4h, v23.4h }, [x22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h | st1 { v18.8h, v19.8h }, [x2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s | st1 { v13.2s, v14.2s }, [x9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s | st1 { v15.4s, v16.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d | st1 { v21.1d, v22.1d }, [x29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d | st1 { v26.2d, v27.2d }, [x28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v23.8b, v24.8b }, [x4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v15.16b, v16.16b }, [x16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v7.4h, v8.4h }, [x7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v8.8h, v9.8h }, [x1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v23.2s, v24.2s }, [x7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v8.4s, v9.4s }, [x15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v14.1d, v15.1d }, [x11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v12.2d, v13.2d }, [x2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v3.8b, v4.8b }, [x28], x14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v19.16b, v20.16b }, [x13], x7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v28.4h, v29.4h }, [x14], x5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v9.8h, v10.8h }, [x28], x9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v10.2s, v11.2s }, [x10], x2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v13.4s, v14.4s }, [x8], x15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v5.1d, v6.1d }, [x9], x14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v14.2d, v15.2d }, [x24], x1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b | st1 { v15.8b, v16.8b, v17.8b }, [x0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b | st1 { v27.16b, v28.16b, v29.16b }, [x18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h | st1 { v13.4h, v14.4h, v15.4h }, [x7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h | st1 { v8.8h, v9.8h, v10.8h }, [x16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s | st1 { v12.2s, v13.2s, v14.2s }, [x3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s | st1 { v19.4s, v20.4s, v21.4s }, [x7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d | st1 { v5.1d, v6.1d, v7.1d }, [x3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d | st1 { v13.2d, v14.2d, v15.2d }, [x27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v3.8b, v4.8b, v5.8b }, [x21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v25.16b, v26.16b, v27.16b }, [x4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v24.4h, v25.4h, v26.4h }, [x9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v0.8h, v1.8h, v2.8h }, [x7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v3.2s, v4.2s, v5.2s }, [x4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v25.4s, v26.4s, v27.4s }, [x14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v7.1d, v8.1d, v9.1d }, [x13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v19.2d, v20.2d, v21.2d }, [x5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v5.8b, v6.8b, v7.8b }, [x17], x25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v12.16b, v13.16b, v14.16b }, [x29], x23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v18.4h, v19.4h, v20.4h }, [x0], x14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v16.8h, v17.8h, v18.8h }, [x1], x18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v1.2s, v2.2s, v3.2s }, [x15], x29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v2.4s, v3.4s, v4.4s }, [x29], x6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v8.1d, v9.1d, v10.1d }, [x13], x27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v8.2d, v9.2d, v10.2d }, [x18], x19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b | st1 { v18.16b, v19.16b, v20.16b, v21.16b }, [x29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h | st1 { v23.4h, v24.4h, v25.4h, v26.4h }, [x24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h | st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s | st1 { v6.2s, v7.2s, v8.2s, v9.2s }, [x13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s | st1 { v26.4s, v27.4s, v28.4s, v29.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d | st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d | st1 { v25.2d, v26.2d, v27.2d, v28.2d }, [x19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v27.8b, v28.8b, v29.8b, v30.8b }, [x17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v18.4h, v19.4h, v20.4h, v21.4h }, [x22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v13.2s, v14.2s, v15.2s, v16.2s }, [x25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v12.2d, v13.2d, v14.2d, v15.2d }, [x25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x25], x28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x24], x5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x25], x19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v20.8h, v21.8h, v22.8h, v23.8h }, [x18], x0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v4.2s, v5.2s, v6.2s, v7.2s }, [x9], x5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x12], x30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v23.1d, v24.1d, v25.1d, v26.1d }, [x23], x4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v20.2d, v21.2d, v22.2d, v23.2d }, [x7], x14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8 | st1 { v1.b }[5], [x1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16 | st1 { v0.h }[2], [x1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32 | st1 { v31.s }[1], [x16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64 | st1 { v15.d }[1], [x8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v15.b }[1], [x12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v16.b }[3], [x0], x2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v29.h }[2], [x27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v15.h }[4], [x30], x9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v3.s }[1], [x24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v26.s }[0], [x2], x30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v19.d }[1], [x9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v29.d }[0], [x26], x22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_H_IMM | st1b { z7.h }, p2, [x14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S_IMM | st1b { z16.s }, p4, [x20, #3, mul vl] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S | st1b { z17.s }, p3, [x20, x0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_UXTW | st1b { z0.d }, p4, [x11, z13.d, uxtw] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_SXTW | st1b { z16.s }, p4, [x19, z25.s, sxtw] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D | st1b { z10.d }, p3, [x12, z21.d] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z17.s }, p7, [z28.s] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z16.s }, p0, [z25.s, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z15.d }, p6, [z27.d] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z2.d }, p0, [z21.d, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_SXTW_SCALED | st1d { z10.d }, p2, [x26, z5.d, sxtw #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_UXTW | st1d { z18.d }, p2, [x7, z1.d, uxtw] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_SCALED | st1d { z9.d }, p6, [x6, z12.d, lsl #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D | st1d { z3.d }, p3, [x1, z30.d] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_IMM | st1d { z18.d }, p0, [z7.d] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_IMM | st1d { z4.d }, p2, [z2.d, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_S_IMM | st1h { z28.s }, p3, [x18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_IMM | st1h { z23.h }, p1, [x14, #-8, mul vl] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | ST1H_S | st1h { z25.s }, p3, [x17, x8, lsl #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_SXTW_SCALED | st1h { z12.s }, p3, [x24, z30.s, sxtw #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_UXTW_SCALED | st1h { z26.d }, p5, [x9, z17.d, uxtw #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_SXTW | st1h { z23.d }, p1, [x5, z25.d, sxtw] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_SXTW | st1h { z14.s }, p4, [x22, z17.s, sxtw] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_SCALED | st1h { z23.d }, p3, [x25, z11.d, lsl #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D | st1h { z0.d }, p4, [x21, z21.d] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z29.s }, p5, [z9.s] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z4.s }, p7, [z23.s, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z27.d }, p2, [z3.d] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z11.d }, p6, [z7.d, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_SXTW_SCALED | st1w { z25.s }, p1, [x9, z28.s, sxtw #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SXTW_SCALED | st1w { z13.d }, p3, [x16, z9.d, sxtw #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SXTW | st1w { z21.d }, p1, [x24, z23.d, sxtw] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_UXTW | st1w { z17.s }, p1, [x5, z22.s, uxtw] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SCALED | st1w { z28.d }, p1, [x5, z8.d, lsl #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D | st1w { z26.d }, p3, [x3, z0.d] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_IMM | st1w { z28.s }, p6, [z21.s] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_IMM | st1w { z26.s }, p3, [z24.s, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z3.d }, p0, [z12.d] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z17.d }, p2, [z1.d, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b | st2 { v14.8b, v15.8b }, [x2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b | st2 { v6.16b, v7.16b }, [x23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h | st2 { v10.4h, v11.4h }, [x18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h | st2 { v10.8h, v11.8h }, [x18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s | st2 { v25.2s, v26.2s }, [x29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s | st2 { v26.4s, v27.4s }, [x14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d | st2 { v10.2d, v11.2d }, [x1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v21.8b, v22.8b }, [x22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v26.16b, v27.16b }, [x2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v19.4h, v20.4h }, [x27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v28.8h, v29.8h }, [x22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v1.2s, v2.2s }, [x26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v19.4s, v20.4s }, [x7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v22.2d, v23.2d }, [x18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v29.8b, v30.8b }, [x9], x2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v17.16b, v18.16b }, [x4], x0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v9.4h, v10.4h }, [x7], x25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v8.8h, v9.8h }, [x11], x8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v17.2s, v18.2s }, [x2], x8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v9.4s, v10.4s }, [x23], x12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v29.2d, v30.2d }, [x25], x11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8 | st2 { v21.b, v22.b }[15], [x15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16 | st2 { v28.h, v29.h }[2], [x6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32 | st2 { v14.s, v15.s }[1], [x25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64 | st2 { v17.d, v18.d }[1], [x1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v9.b, v10.b }[15], [x12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v19.b, v20.b }[9], [x27], x28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v18.h, v19.h }[3], [x30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v13.h, v14.h }[5], [x23], x24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v23.s, v24.s }[1], [x22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v16.s, v17.s }[3], [x12], x16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v27.d, v28.d }[0], [x16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v6.d, v7.d }[1], [x14], x5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B_IMM | st2b { z19.b, z20.b }, p1, [x18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B_IMM | st2b { z26.b, z27.b }, p7, [x15, #-6, mul vl] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B | st2b { z19.b, z20.b }, p1, [x23, x27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D_IMM | st2d { z29.d, z30.d }, p4, [x8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D_IMM | st2d { z16.d, z17.d }, p3, [x20, #14, mul vl] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D | st2d { z17.d, z18.d }, p7, [x2, x28, lsl #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2H_IMM | st2h { z5.h, z6.h }, p7, [x23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2H_IMM | st2h { z11.h, z12.h }, p6, [x4, #10, mul vl] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitV[2] | ST2H | st2h { z3.h, z4.h }, p3, [x22, x16, lsl #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 1.0 V1UnitL01[2],V1UnitS[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W_IMM | st2w { z14.s, z15.s }, p4, [x17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W_IMM | st2w { z9.s, z10.s }, p5, [x19, #-8, mul vl] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W | st2w { z5.s, z6.s }, p3, [x23, x13, lsl #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b | st3 { v10.8b, v11.8b, v12.8b }, [x18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b | st3 { v26.16b, v27.16b, v28.16b }, [x4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h | st3 { v25.4h, v26.4h, v27.4h }, [x11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h | st3 { v0.8h, v1.8h, v2.8h }, [x0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s | st3 { v19.2s, v20.2s, v21.2s }, [x30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s | st3 { v24.4s, v25.4s, v26.4s }, [x8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d | st3 { v24.2d, v25.2d, v26.2d }, [x25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v25.8b, v26.8b, v27.8b }, [x23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v24.4h, v25.4h, v26.4h }, [x3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v23.8h, v24.8h, v25.8h }, [x22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v7.2s, v8.2s, v9.2s }, [x8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v11.4s, v12.4s, v13.4s }, [x15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v1.2d, v2.2d, v3.2d }, [x4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v16.8b, v17.8b, v18.8b }, [x26], x2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x3], x18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v2.4h, v3.4h, v4.4h }, [x4], x4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v27.8h, v28.8h, v29.8h }, [x27], x8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v26.2s, v27.2s, v28.2s }, [x2], x25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v5.4s, v6.4s, v7.4s }, [x18], x29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v26.2d, v27.2d, v28.2d }, [x14], x5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8 | st3 { v8.b, v9.b, v10.b }[4], [x18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16 | st3 { v11.h, v12.h, v13.h }[4], [x0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32 | st3 { v9.s, v10.s, v11.s }[2], [x20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64 | st3 { v16.d, v17.d, v18.d }[0], [x13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v26.b, v27.b, v28.b }[1], [x12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v27.b, v28.b, v29.b }[15], [x19], x23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v24.h, v25.h, v26.h }[2], [x14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v1.h, v2.h, v3.h }[2], [x0], x23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v25.s, v26.s, v27.s }[2], [x10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v8.s, v9.s, v10.s }[0], [x11], x20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v19.d, v20.d, v21.d }[1], [x5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v10.d, v11.d, v12.d }[0], [x12], x11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3B_IMM | st3b { z0.b - z2.b }, p6, [x26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3B_IMM | st3b { z22.b - z24.b }, p6, [x25, #3, mul vl] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3B | st3b { z14.b - z16.b }, p2, [x29, x27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3D_IMM | st3d { z6.d - z8.d }, p2, [x12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3D_IMM | st3d { z20.d - z22.d }, p5, [x15, #9, mul vl] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3D | st3d { z15.d - z17.d }, p7, [x0, x9, lsl #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3H_IMM | st3h { z17.h - z19.h }, p3, [x14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3H_IMM | st3h { z21.h - z23.h }, p0, [x15, #6, mul vl] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3H | st3h { z2.h - z4.h }, p3, [x21, x9, lsl #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3W_IMM | st3w { z9.s - z11.s }, p3, [x29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3W_IMM | st3w { z11.s - z13.s }, p4, [x13, #15, mul vl] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3W | st3w { z19.s - z21.s }, p2, [x22, x28, lsl #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b | st4 { v17.8b, v18.8b, v19.8b, v20.8b }, [x8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b | st4 { v7.16b, v8.16b, v9.16b, v10.16b }, [x15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h | st4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h | st4 { v11.8h, v12.8h, v13.8h, v14.8h }, [x1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s | st4 { v15.2s, v16.2s, v17.2s, v18.2s }, [x18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 4 | 4 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d | st4 { v25.2d, v26.2d, v27.2d, v28.2d }, [x16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 2 4 4 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v16.8b, v17.8b, v18.8b, v19.8b }, [x24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v17.4h, v18.4h, v19.4h, v20.4h }, [x3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v18.8h, v19.8h, v20.8h, v21.8h }, [x5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v26.2s, v27.2s, v28.2s, v29.2s }, [x17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v24.8b, v25.8b, v26.8b, v27.8b }, [x24], x8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x21], x21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v11.4h, v12.4h, v13.4h, v14.4h }, [x29], x3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v16.8h, v17.8h, v18.8h, v19.8h }, [x13], x3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v13.2s, v14.2s, v15.2s, v16.2s }, [x0], x0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v26.4s, v27.4s, v28.4s, v29.4s }, [x1], x22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x10], x28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8 | st4 { v10.b, v11.b, v12.b, v13.b }[3], [x5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16 | st4 { v5.h, v6.h, v7.h, v8.h }[4], [x13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32 | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64 | st4 { v23.d, v24.d, v25.d, v26.d }[1], [x5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v22.b, v23.b, v24.b, v25.b }[0], [x29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v6.b, v7.b, v8.b, v9.b }[9], [x26], x21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v19.h, v20.h, v21.h, v22.h }[2], [x18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v6.h, v7.h, v8.h, v9.h }[4], [x9], x9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v19.s, v20.s, v21.s, v22.s }[2], [x27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x29], x21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x12], x11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4B_IMM | st4b { z22.b - z25.b }, p0, [x0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4B_IMM | st4b { z1.b - z4.b }, p7, [x1, #20, mul vl] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4B | st4b { z28.b - z31.b }, p4, [x27, x20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4D_IMM | st4d { z19.d - z22.d }, p1, [x11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4D_IMM | st4d { z0.d - z3.d }, p6, [x7, #-24, mul vl] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4D | st4d { z28.d - z31.d }, p5, [x19, x20, lsl #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4H_IMM | st4h { z14.h - z17.h }, p1, [x24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4H_IMM | st4h { z27.h - z30.h }, p3, [x26, #16, mul vl] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4H | st4h { z2.h - z5.h }, p5, [x30, x17, lsl #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4W_IMM | st4w { z3.s - z6.s }, p0, [x0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4W_IMM | st4w { z5.s - z8.s }, p2, [x0, #-20, mul vl] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4W | st4w { z21.s - z24.s }, p5, [x5, x18, lsl #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w19, [x26] // STLRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w9, [x19] // STLRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRH | stlrh w4, [x7] // STLRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRH | stlrh w20, [x5] // STLRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURWi | stlur w3, [x27] // STLUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURWi | stlur w0, [x15, #-14] // STLUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURXi | stlur x23, [x25] // STLUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURXi | stlur x18, [x6, #101] // STLUR <Xt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURBi | stlurb w30, [x17] // STLURB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURBi | stlurb w25, [x21, #-8] // STLURB <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURHi | stlurh w9, [x29] // STLURH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLURHi | stlurh w6, [x27, #-224] // STLURH <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPW | stlxp w26, w11, w12, [x7] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPW | stlxp w24, w10, w16, [x8] // STLXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPX | stlxp w1, x25, x26, [x10] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXPX | stlxp w10, x7, x20, [x22] // STLXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRW | stlxr w23, w8, [x6] // STLXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRW | stlxr w29, w28, [x26] // STLXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRX | stlxr w23, x8, [x7] // STLXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRX | stlxr w14, x18, [x23] // STLXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRB | stlxrb w2, w7, [x10] // STLXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRB | stlxrb w0, w1, [x20] // STLXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w16, w17, [x21] // STLXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w12, w26, [x23] // STLXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s29, s16, [x11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s17, s19, [x27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d4, d3, [x30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d25, d31, [x28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q28, q22, [x3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q17, q15, [x16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w29, w25, [x5] // STNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w16, w18, [x27, #-232] // STNP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x20, x16, [x8] // STNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x6, x20, [x15, #-120] // STNP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z18.b }, p7, [x21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z9.b }, p6, [x26, #-7, mul vl] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRR | stnt1b { z18.b }, p1, [x1, x20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z16.d }, p3, [x3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z27.d }, p4, [x16, #-6, mul vl] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRR | stnt1d { z11.d }, p0, [x18, x22, lsl #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z27.h }, p5, [x16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z2.h }, p2, [x30, #-8, mul vl] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | STNT1H_ZRR | stnt1h { z0.h }, p1, [x7, x1, lsl #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z9.s }, p3, [x20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z12.s }, p4, [x11, #-6, mul vl] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRR | stnt1w { z28.s }, p6, [x6, x0, lsl #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpost | stp s10, s19, [x13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpost | stp d19, d20, [x30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | STPQpost | stp q3, q17, [x14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpre | stp s19, s24, [x27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpre | stp d16, d21, [x28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | STPQpre | stp q10, q31, [x0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s27, s11, [x30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d30, d19, [x25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q25, q3, [x27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s29, s13, [x0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d15, d12, [x20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q13, q16, [x3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpost | stp w18, w8, [x6], #196 // STP <Wt1>, <Wt2>, [<Xn|SP>], #<imms> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPXpost | stp x10, x17, [x7], #-328 // STP <Xt1>, <Xt2>, [<Xn|SP>], #<immd> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpre | stp w4, w3, [x0, #-36]! // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPXpre | stp x14, x13, [x24, #-272]! // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPWi | stp w27, w30, [x20] // STP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPXi | stp x3, x6, [x16] // STP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPWi | stp w9, w14, [x10, #-24] // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STPXi | stp x27, x4, [x14, #-448] // STP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRWpost | str w14, [x2], #-72 // STR <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXpost | str x28, [x14], #-130 // STR <Xt>, [<Xn|SP>], #<simm> \\ Store register, immed post-index \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRWpre | str w9, [x29, #-227]! // STR <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXpre | str x13, [x5, #233]! // STR <Xt>, [<Xn|SP>, #<simm>]! \\ Store register, immed pre-index \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWui | str w2, [x30] // STR <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWui | str w28, [x2, #1796] // STR <Wt>, [<Xn|SP>, #<pimm32>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x22, [x29] // STR <Xt>, [<Xn|SP>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x2, [x10, #9472] // STR <Xt>, [<Xn|SP>, #<pimm64>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpost | str b21, [x28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpost | str h13, [x10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpost | str s14, [x8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpost | str d24, [x10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpost | str q20, [x30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpre | str b9, [x24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpre | str h0, [x4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpre | str s19, [x23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpre | str d20, [x2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpre | str q24, [x20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b5, [x11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b20, [x23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h23, [x15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h24, [x6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s25, [x19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s2, [x14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d15, [x2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d27, [x7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q13, [x16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q4, [x7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p4, [x5] // STR <Pt>, [<Xn|SP>] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p3, [x21, #-78, mul vl] // STR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w14, [x9, x17] // STR <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x5, [x0, x22] // STR <Xt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w24, [x21, w29, uxtw] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x27, [x26, w24, uxtw] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w28, [x29, w29, sxtw] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x25, [x1, w24, sxtw] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w2, [x24, x12, sxtx] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x3, [x24, x27, sxtx] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w29, [x30, w30, uxtw #2] // STR <Wt>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x5, [x13, w8, uxtw #3] // STR <Xt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroW | str w28, [x7, w24, sxtw #2] // STR <Wt>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroW | str x23, [x2, w26, sxtw #3] // STR <Xt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w11, [x8, x30, sxtx #2] // STR <Wt>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x20, [x4, x2, sxtx #3] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w8, [x11, x10, lsl #2] // STR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x27, [x2, x11, lsl #3] // STR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b14, [x13, x25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b30, [x16, w26, uxtw] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b20, [x19, w3, sxtw] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b13, [x29, x19, sxtx] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h16, [x5, x24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h15, [x15, w15, uxtw] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h3, [x6, w15, sxtw] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h2, [x1, x28, sxtx] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h30, [x29, w30, uxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h10, [x21, w11, sxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h0, [x15, x9, sxtx #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h13, [x0, x26, lsl #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s2, [x16, x17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s20, [x24, w10, uxtw] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s4, [x9, w14, sxtw] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s3, [x23, x26, sxtx] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s27, [x17, w9, uxtw #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s0, [x11, w20, sxtw #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s20, [x17, x14, sxtx #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s0, [x15, x28, lsl #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d5, [x26, x6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d11, [x9, w5, uxtw] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d16, [x20, w8, sxtw] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d0, [x12, x9, sxtx] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d4, [x21, w25, uxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d28, [x20, w4, sxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d20, [x13, x23, sxtx #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d31, [x19, x28, lsl #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q13, [x24, x1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q1, [x25, w9, uxtw] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q25, [x20, w15, sxtw] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q25, [x0, x15, sxtx] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q6, [x13, w0, uxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q27, [x4, w15, sxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q3, [x23, x0, sxtx #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q27, [x1, x28, lsl #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z3, [x0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z8, [x6, #188, mul vl] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpost | strb w23, [x11], #34 // STRB <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpre | strb w5, [x19, #-175]! // STRB <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBui | strb w18, [x30] // STRB <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBui | strb w12, [x9, #2315] // STRB <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroW | strb w5, [x26, w7, uxtw] // STRB <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroW | strb w18, [x2, w28, sxtw] // STRB <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroX | strb w21, [x21, x7, sxtx] // STRB <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBroX | strb w9, [x6, x21] // STRB <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHpost | strh w21, [x8], #192 // STRH <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHpre | strh w8, [x26, #-204]! // STRH <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHui | strh w6, [x7] // STRH <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHui | strh w0, [x19, #7514] // STRH <Wt>, [<Xn|SP>, #<pimm>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w12, [x0, x11] // STRH <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w5, [x18, w8, uxtw] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroW | strh w28, [x29, w0, sxtw] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRHHroX | strh w7, [x17, x0, sxtx] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX] \\ Store register, register offset, extend \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHroW | strh w7, [x2, w14, uxtw #1] // STRH <Wt>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHroW | strh w7, [x16, w29, sxtw #1] // STRH <Wt>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHroX | strh w5, [x1, x13, sxtx #1] // STRH <Wt>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store register, register offset, extend, scale by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRHHroX | strh w14, [x28, x2, lsl #1] // STRH <Wt>, [<Xn|SP>, <Xm>, LSL #1] \\ Store register, register offset, scaled by 1 \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRWi | sttr w17, [x20] // STTR <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRWi | sttr w14, [x30, #-35] // STTR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRXi | sttr x10, [x16] // STTR <Xt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRXi | sttr x16, [x8, #-25] // STTR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRBi | sttrb w13, [x2] // STTRB <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRBi | sttrb w0, [x20, #-114] // STTRB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w26, [x11] // STTRH <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w11, [x30, #-78] // STTRH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b29, [x8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b5, [x0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s10, [x4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s9, [x14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d1, [x28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d6, [x6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q6, [x16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q5, [x13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w29, [x27] // STUR <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w14, [x2, #-34] // STUR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURXi | stur x29, [x10] // STUR <Xt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURXi | stur x30, [x25, #127] // STUR <Xt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURBBi | sturb w21, [x5] // STURB <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURBBi | sturb w25, [x26, #-117] // STURB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURHHi | sturh w0, [x11] // STURH <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURHHi | sturh w7, [x10, #-209] // STURH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPW | stxp w29, w24, w6, [x9] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPW | stxp w26, w19, w22, [x11] // STXP <Ws>, <Wt1>, <Wt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPX | stxp w30, x6, x3, [x1] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXPX | stxp w7, x2, x10, [x25] // STXP <Ws>, <Xt1>, <Xt2>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRW | stxr w19, w21, [x9] // STXR <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRW | stxr w25, w1, [x24] // STXR <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRX | stxr w25, x30, [x28] // STXR <Ws>, <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRX | stxr w30, x20, [x23] // STXR <Ws>, <Xt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRB | stxrb w0, w26, [x10] // STXRB <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRB | stxrb w10, w16, [x25] // STXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRH | stxrh w0, w20, [x8] // STXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
+# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STXRH | stxrh w12, w14, [x1] // STXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrx | sub w13, wsp, w10 // SUB <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrx | sub w22, wsp, w13, uxtb // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrx | sub w18, wsp, w23, sxtb #1 // SUB <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrx | sub w13, wsp, w8, lsl #4 // SUB <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x6, x8, x22 // SUB <Xd>, <Xn|SP>, X<m> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrx | sub x16, x2, w19, uxtb // SUB <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrx | sub x16, x3, w27, uxtb #2 // SUB <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x4, x13, x16, lsl #3 // SUB <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWri | sub wsp, wsp, #50 // SUB <Wd|WSP>, <Wn|WSP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWri | sub wsp, wsp, #84, lsl #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x18, x22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x17, x20, #184 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_B | sub z18.b, z18.b, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_S | sub z22.s, z22.s, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_H | sub z15.h, z15.h, #50176 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | sub w0, w21, w2, lsl #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w22, w7, w13, lsl #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w1, w18, w16, asr #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x27, x29, x16, lsl #1 // SUB <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x10, x15, lsl #35 // SUB <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x19, x13, lsr #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv1i64 | sub d18, d25, d0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv2i32 | sub v15.2s, v14.2s, v11.2s // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZPmZ_H | sub z18.h, p4/m, z18.h, z7.h // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZZZ_B | sub z29.b, z19.b, z8.b // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv4i32_v4i16 | subhn v7.4h, v10.4s, v13.4s // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv2i64_v4i32 | subhn2 v24.4s, v24.2d, v8.2d // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_B | subr z13.b, z13.b, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_S | subr z17.s, z17.s, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_D | subr z15.d, z15.d, #100 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZPmZ_D | subr z21.d, p7/m, z21.d, z24.d // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w25, wsp, w13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w10, wsp, w9, uxth // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w20, wsp, w3, sxth #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w12, wsp, w27, lsl #4 // SUBS <Wd>, <Wn|WSP>, <Wm>, LSL #<amount> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x16, x20, x21 // SUBS <Xd>, <Xn|SP>, X<m> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrx | subs x15, x2, w11, uxtb // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrx64 | subs x13, x15, x14, sxtx #1 // SUBS <Xd>, <Xn|SP>, <R><m>, <extend> #<amount> \\ ALU, flagset, extend and shift \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x30, x1, x26, lsl #3 // SUBS <Xd>, <Xn|SP>, X<m>, LSL #<amount> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWri | subs w25, wsp, #239 // SUBS <Wd>, <Wn|WSP>, #<imm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWri | subs w13, wsp, #75, lsl #12 // SUBS <Wd>, <Wn|WSP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXri | subs x9, x3, #173 // SUBS <Xd>, <Xn|SP>, #<imm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXri | subs x30, x25, #82, lsl #12 // SUBS <Xd>, <Xn|SP>, #<imm>, <shift> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | subs w16, w27, w25 // SUBS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | subs w0, w30, w27, lsl #4 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | subs w17, w27, w3, lsl #20 // SUBS <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | subs w27, w7, w27, asr #5 // SUBS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x21, x22, x17 // SUBS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x18, x1, x5 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x28, x26, x4, lsl #49 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x26, x14, x30, lsr #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SUDOTlanev8i8 | sudot v4.2s, v20.8b, v18.4b[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | SUDOT_ZZZI | sudot z5.s, z30.b, z3.b[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKHI_ZZ_D | sunpkhi z22.d, z16.s // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKLO_ZZ_H | sunpklo z10.h, z0.b // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv1i8 | suqadd b15, b21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv16i8 | suqadd v26.16b, v27.16b // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SVC | svc #0x89cb // SVC #<imm> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxtb w7, w20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtb x18, w14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTB_ZPmZ_H | sxtb z16.h, p5/m, z15.h // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTH_ZPmZ_S | sxth z4.s, p7/m, z11.s // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTW_ZPmZ_D | sxtw z12.d, p1/m, z16.d // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxth w23, w2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxth x22, w17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v4.8h, v21.8b, #0 // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v20.2d, v30.4s, #0 // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtw x18, w22 // SXTW <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #6, c6, c0, #3 // SYS #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #7, c12, c5, #3, x8 // SYS #<op1>, <Cn>, <Cm>, #<op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSLxt | sysl x16, #5, c11, c8, #5 // SYSL <Xt>, #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBLv8i8Two | tbl v7.8b, { v2.16b, v3.16b }, v17.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBLv16i8Three | tbl v3.16b, { v10.16b, v11.16b, v12.16b }, v29.16b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 1 4 4 1.0 V1UnitV01[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBLv8i8Four | tbl v9.8b, { v22.16b, v23.16b, v24.16b, v25.16b }, v14.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 1 4 4 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBLv16i8One | tbl v29.16b, { v3.16b }, v17.16b // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZW | tbnz w3, #28, test // TBNZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZX | tbnz x30, #48, test // TBNZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBXv8i8Two | tbx v25.8b, { v13.16b, v14.16b }, v30.8b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 1 4 4 1.0 V1UnitV01[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBXv16i8Three | tbx v22.16b, { v3.16b, v4.16b, v5.16b }, v25.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 1 6 6 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 6 | 6 | 0.40 | V1UnitV[5], V1UnitV01[5] | TBXv16i8Four | tbx v23.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v26.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 1 6 6 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBXv8i8One | tbx v16.8b, { v21.16b }, v18.8b // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZW | tbz w17, #16, test // TBZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZX | tbz x22, #41, test // TBZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi vmalle1 // TLBI <tlbi_op> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi ipas2e1is, x7 // TLBI <tlbi_op2>, <Xt> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN1v2i32 | trn1 v30.2s, v21.2s, v25.2s // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN1_PPP_S | trn1 p1.s, p4.s, p0.s // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN2_PPP_H | trn2 p0.h, p5.h, p7.h // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN2v2i64 | trn2 v27.2d, v29.2d, v10.2d // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | tst w25, #0xe00 // TST <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | tst x3, #0x1e00 // TST <Xn>, #<immd> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWrs | tst w9, w14 // TST <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | tst w10, w3, asr #16 // TST <Wn>, <Wm>, <shift> #<wamount> \\ Test/Compare, shift by immed \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXrs | tst x11, x28 // TST <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | tst x9, x7, asr #33 // TST <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABAv16i8 | uaba v13.16b, v14.16b, v19.16b // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv2i32_v2i64 | uabal v13.2d, v16.2s, v11.2s // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv8i16_v4i32 | uabal2 v17.4s, v0.8h, v1.8h // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDv4i32 | uabd v23.4s, v4.4s, v30.4s // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UABD_ZPmZ_B | uabd z5.b, p5/m, z5.b, z10.b // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i16_v4i32 | uabdl v13.4s, v26.4h, v7.4h // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i32_v2i64 | uabdl2 v15.2d, v9.4s, v10.4s // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UADALPv2i32_v1i64 | uadalp v31.1d, v14.2s // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i8_v8i16 | uaddl v29.8h, v8.8b, v31.8b // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i16_v4i32 | uaddl2 v15.4s, v22.8h, v14.8h // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLPv2i32_v1i64 | uaddlp v15.1d, v5.2s // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv8i8v | uaddlv h24, v24.8b // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UADDLVv16i8v | uaddlv h19, v31.16b // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i16v | uaddlv s12, v24.4h // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv8i16v | uaddlv s30, v0.8h // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i32v | uaddlv d6, v19.4s // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_B | uaddv d9, p5, z1.b // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_H | uaddv d26, p0, z25.h // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_S | uaddv d4, p1, z1.s // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_D | uaddv d28, p6, z6.d // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv2i32_v2i64 | uaddw v17.2d, v9.2d, v12.2s // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv8i16_v4i32 | uaddw2 v15.4s, v13.4s, v4.8h // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | ubfiz w11, w6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | ubfiz x27, x15, #49, #9 // UBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w19, w16, #7 // UBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | ubfiz x4, x30, #5, #51 // UBFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | ubfx w13, w18, #25, #3 // UBFX <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsr x23, x26, #59 // UBFX <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field extract \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWHri | ucvtf h8, w24, #16 // UCVTF <Hd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWSri | ucvtf s7, w16, #29 // UCVTF <Sd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSWDri | ucvtf d5, w17, #23 // UCVTF <Dd>, <Wn>, #<sfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXHri | ucvtf h13, x17, #12 // UCVTF <Hd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXSri | ucvtf s25, x2, #37 // UCVTF <Sd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFSXDri | ucvtf d20, x11, #43 // UCVTF <Dd>, <Xn>, #<dfbits> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWHri | ucvtf h30, w4 // UCVTF <Hd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWSri | ucvtf s22, w8 // UCVTF <Sd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUWDri | ucvtf d8, w15 // UCVTF <Dd>, <Wn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXHri | ucvtf h17, x12 // UCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXSri | ucvtf s8, x0 // UCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXDri | ucvtf d22, x17 // UCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFh | ucvtf h22, h16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFs | ucvtf s17, s18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFd | ucvtf d19, d1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i16_shift | ucvtf v18.4h, v11.4h, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | UCVTFv8i16_shift | ucvtf v22.8h, v20.8h, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i32_shift | ucvtf v16.2s, v17.2s, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i32_shift | ucvtf v17.4s, v23.4s, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i64_shift | ucvtf v18.2d, v20.2d, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i16 | ucvtf h7, h21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i32 | ucvtf s25, s7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i64 | ucvtf d30, d29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f16 | ucvtf v9.4h, v25.4h // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | UCVTFv8f16 | ucvtf v24.8h, v31.8h // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f32 | ucvtf v14.2s, v2.2s // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f32 | ucvtf v20.4s, v0.4s // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f64 | ucvtf v27.2d, v3.2d // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | UCVTF_ZPmZ_HtoH | ucvtf z31.h, p5/m, z30.h // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoH | ucvtf z23.h, p7/m, z9.s // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoS | ucvtf z1.s, p1/m, z10.s // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoD | ucvtf z24.d, p5/m, z9.s // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoH | ucvtf z30.h, p2/m, z24.d // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoS | ucvtf z9.s, p5/m, z9.d // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoD | ucvtf z18.d, p6/m, z19.d // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 12 | 12 | 0.08 | V1UnitI[12], V1UnitM[12], V1UnitM0[12] | UDIVWr | udiv w12, w17, w22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitI[20], V1UnitM[20], V1UnitM0[20] | UDIVXr | udiv x7, x2, x23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
+# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | UDIV_ZPmZ_S | udiv z30.s, p5/m, z30.s, z10.s // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | UDIV_ZPmZ_D | udiv z31.d, p5/m, z31.d, z29.d // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | UDIVR_ZPmZ_S | udivr z19.s, p4/m, z19.s, z8.s // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
+# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | UDIVR_ZPmZ_D | udivr z3.d, p5/m, z3.d, z8.d // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZI_S | udot z0.s, z5.b, z4.b[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZI_D | udot z19.d, z1.h, z13.h[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZ_S | udot z22.s, z29.b, z4.b // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZ_D | udot z9.d, z1.h, z11.h // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev8i8 | udot v10.2s, v11.8b, v21.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev16i8 | udot v7.4s, v21.16b, v6.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTv8i8 | udot v19.2s, v31.8b, v17.8b // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHADDv8i16 | uhadd v10.8h, v7.8h, v7.8h // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHSUBv4i16 | uhsub v12.4h, v16.4h, v28.4h // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMADDLrrr | umaddl x9, w28, w9, x19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZI_B | umax z8.b, z8.b, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZPmZ_B | umax z27.b, p1/m, z27.b, z13.b // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXv16i8 | umax v7.16b, v11.16b, v7.16b // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXPv8i16 | umaxp v15.8h, v8.8h, v12.8h // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv8i8v | umaxv b19, v7.8b // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMAXVv16i8v | umaxv b12, v10.16b // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i16v | umaxv h27, v5.4h // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv8i16v | umaxv h11, v22.8h // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i32v | umaxv s5, v25.4s // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_B | umaxv b9, p7, z19.b // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_H | umaxv h8, p7, z26.h // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_S | umaxv s15, p2, z28.s // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_D | umaxv d11, p4, z11.d // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZI_S | umin z21.s, z21.s, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZPmZ_S | umin z31.s, p2/m, z31.s, z4.s // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINv16i8 | umin v0.16b, v26.16b, v2.16b // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINPv4i32 | uminp v28.4s, v16.4s, v15.4s // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMINVv8i8v | uminv b23, v21.8b // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMINVv16i8v | uminv b3, v10.16b // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i16v | uminv h6, v22.4h // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMINVv8i16v | uminv h23, v3.8h // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i32v | uminv s29, v19.4s // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_B | uminv b2, p5, z8.b // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_H | uminv h28, p0, z0.h // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_S | uminv s10, p1, z29.s // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_D | uminv d24, p5, z29.d // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_indexed | umlal v22.4s, v14.4h, v0.h[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv2i32_indexed | umlal v28.2d, v31.2s, v0.s[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv8i16_indexed | umlal2 v31.4s, v7.8h, v15.h[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_indexed | umlal2 v10.2d, v4.4s, v3.s[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_v4i32 | umlal v29.4s, v20.4h, v30.4h // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_v2i64 | umlal2 v10.2d, v28.4s, v19.4s // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i16_indexed | umlsl v21.4s, v12.4h, v7.h[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_indexed | umlsl v20.2d, v20.2s, v2.s[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv8i16_indexed | umlsl2 v27.4s, v28.8h, v6.h[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i32_indexed | umlsl2 v30.2d, v23.4s, v1.s[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_v2i64 | umlsl v11.2d, v23.2s, v1.2s // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv16i8_v8i16 | umlsl2 v11.8h, v20.16b, v2.16b // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UMMLA | ummla v14.4s, v17.16b, v25.16b // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMSUBLrrr | umnegl x23, w5, w23 // UMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi8_idx0 | umov w6, v22.b[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi8 | umov w29, v0.b[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi16_idx0 | umov w10, v25.h[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi16 | umov w6, v7.h[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32_idx0 | mov w8, v8.s[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32 | mov w20, v1.s[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64_idx0 | mov x20, v11.d[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64 | mov x29, v7.d[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMSUBLrrr | umsubl x21, w16, w28, x6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_B | umulh z20.b, p4/m, z20.b, z6.b // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_H | umulh z30.h, p6/m, z30.h, z15.h // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_S | umulh z11.s, p7/m, z11.s, z8.s // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UMULH_ZPmZ_D | umulh z3.d, p3/m, z3.d, z2.d // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | UMULHrr | umulh x23, x22, x19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMADDLrrr | umull x5, w17, w23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_indexed | umull v27.4s, v1.4h, v8.h[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv2i32_indexed | umull v22.2d, v28.2s, v6.s[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv8i16_indexed | umull2 v18.4s, v26.8h, v10.h[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i32_indexed | umull2 v28.2d, v21.4s, v1.s[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_v4i32 | umull v23.4s, v26.4h, v19.4h // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv16i8_v8i16 | umull2 v11.8h, v29.16b, v29.16b // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_B | uqadd z18.b, z18.b, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z2.s, z2.s, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z24.s, z24.s, #56 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZZZ_H | uqadd z6.h, z28.h, z5.h // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv1i32 | uqadd s0, s24, s30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv2i64 | uqadd v14.2d, v22.2d, v20.2d // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w8, vl3 // UQDECB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w3, vl32 // UQDECB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x8 // UQDECB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x3, vl5 // UQDECB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_XPiI | uqdecb x22, mul3, mul #2 // UQDECB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w11 // UQDECD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w27, vl256 // UQDECD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_WPiI | uqdecd w6, vl32, mul #10 // UQDECD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x12, vl8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x10, vl64, mul #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z0.d // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z8.d, vl3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z27.d, vl16, mul #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w28, mul3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w5, vl5, mul #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x15, vl7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x17, vl256, mul #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z5.h // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z16.h, vl128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z27.h, vl128, mul #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_WP_H | uqdecp w19, p5.h // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_XP_B | uqdecp x1, p1.b // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | UQDECP_ZP_S | uqdecp z20.s, p0.s // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w11, vl256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w13, mul4, mul #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x28, vl32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x0, vl256, mul #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z29.s // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z22.s, vl2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z20.s, vl2, mul #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w21, vl128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w0, all, mul #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x24 // UQINCB <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x18, vl7 // UQINCB <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_XPiI | uqincb x13, vl256, mul #13 // UQINCB <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w23 // UQINCD <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w27, vl4 // UQINCD <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_WPiI | uqincd w7, vl32, mul #16 // UQINCD <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x29, mul4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x20, pow2, mul #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z29.d // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z4.d, vl64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z12.d, vl6, mul #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w23, mul3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w27, vl7, mul #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x13, mul3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x5, mul4, mul #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z21.h // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z1.h, vl8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z7.h, vl7, mul #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_WP_D | uqincp w4, p5.d // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_XP_D | uqincp x13, p5.d // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | UQINCP_ZP_S | uqincp z1.s, p0.s // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w26, vl8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w3, vl16, mul #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x13, vl256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x29, vl7, mul #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z26.s // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z31.s, vl5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z12.s, vl7, mul #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv1i32 | uqrshl s17, s5, s8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv8i8 | uqrshl v25.8b, v13.8b, v23.8b // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNb | uqrshrn b12, h9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNh | uqrshrn h1, s28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNs | uqrshrn s1, d4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i8_shift | uqrshrn v17.8b, v24.8h, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i16_shift | uqrshrn v29.4h, v25.4s, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv2i32_shift | uqrshrn v16.2s, v0.2d, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv16i8_shift | uqrshrn2 v5.16b, v28.8h, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i16_shift | uqrshrn2 v28.8h, v22.4s, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i32_shift | uqrshrn2 v20.4s, v13.2d, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLb | uqshl b16, b25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLh | uqshl h22, h27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLs | uqshl s9, s5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLd | uqshl d25, d1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv16i8_shift | uqshl v25.16b, v0.16b, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16_shift | uqshl v1.4h, v12.4h, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i32_shift | uqshl v23.2s, v4.2s, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i64_shift | uqshl v28.2d, v23.2d, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv1i8 | uqshl b22, b26, b2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16 | uqshl v8.4h, v17.4h, v13.4h // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNb | uqshrn b16, h27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNh | uqshrn h4, s2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNs | uqshrn s0, d15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i8_shift | uqshrn v19.8b, v26.8h, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i16_shift | uqshrn v31.4h, v17.4s, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv2i32_shift | uqshrn v1.2s, v11.2d, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv16i8_shift | uqshrn2 v23.16b, v16.8h, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i16_shift | uqshrn2 v1.8h, v12.4s, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i32_shift | uqshrn2 v30.4s, v29.2d, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_B | uqsub z26.b, z26.b, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_S | uqsub z19.s, z19.s, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_H | uqsub z15.h, z15.h, #26624 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZZZ_D | uqsub z25.d, z13.d, z19.d // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv1i32 | uqsub s16, s21, s6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv4i32 | uqsub v19.4s, v0.4s, v5.4s // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv1i32 | uqxtn s3, d27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv2i32 | uqxtn v26.2s, v5.2d // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv16i8 | uqxtn2 v15.16b, v22.8h // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URECPEv2i32 | urecpe v10.2s, v8.2s // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | URECPEv4i32 | urecpe v1.4s, v23.4s // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | URHADDv2i32 | urhadd v16.2s, v19.2s, v2.2s // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv1i64 | urshl d24, d22, d29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv8i8 | urshl v31.8b, v5.8b, v3.8b // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRd | urshr d23, d19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv16i8_shift | urshr v23.16b, v14.16b, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i16_shift | urshr v16.4h, v13.4h, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i32_shift | urshr v10.4s, v10.4s, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv2i64_shift | urshr v2.2d, v16.2d, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URSQRTEv2i32 | ursqrte v15.2s, v20.2s // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | URSQRTEv4i32 | ursqrte v31.4s, v14.4s // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAd | ursra d24, d24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv8i8_shift | ursra v14.8b, v18.8b, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv4i16_shift | ursra v9.4h, v9.4h, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i32_shift | ursra v25.2s, v17.2s, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i64_shift | ursra v17.2d, v16.2d, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTlanev8i8 | usdot v0.2s, v18.8b, v10.4b[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | USDOT_ZZZI | usdot z5.s, z25.b, z2.b[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTv8i8 | usdot v17.2s, v0.8b, v29.8b // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | USDOT_ZZZ | usdot z8.s, z6.b, z18.b // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv1i64 | ushl d7, d17, d3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv8i8 | ushl v6.8b, v26.8b, v6.8b // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i8_shift | ushll v18.8h, v24.8b, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v12.4s, v10.4h, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv2i32_shift | ushll v16.2d, v16.2s, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i16_shift | ushll2 v18.4s, v22.8h, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i32_shift | ushll2 v31.2d, v12.4s, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRd | ushr d23, d22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i8_shift | ushr v24.8b, v0.8b, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i16_shift | ushr v21.8h, v31.8h, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i32_shift | ushr v27.2s, v24.2s, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i64_shift | ushr v0.2d, v27.2d, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USMMLA | usmmla v25.4s, v10.16b, v11.16b // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv1i16 | usqadd h14, h13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv2i64 | usqadd v18.2d, v23.2d // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAd | usra d22, d24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv16i8_shift | usra v16.16b, v5.16b, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv4i16_shift | usra v18.4h, v22.4h, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i32_shift | usra v13.2s, v12.2s, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i64_shift | usra v30.2d, v30.2d, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv4i16_v4i32 | usubl v22.4s, v18.4h, v3.4h // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv16i8_v8i16 | usubl2 v12.8h, v23.16b, v15.16b // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i8_v8i16 | usubw v30.8h, v12.8h, v20.8b // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i16_v4i32 | usubw2 v2.4s, v0.4s, v30.8h // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKHI_ZZ_D | uunpkhi z26.d, z26.s // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKLO_ZZ_S | uunpklo z10.s, z11.h // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxtb w2, w23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTB_ZPmZ_D | uxtb z1.d, p2/m, z11.d // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTH_ZPmZ_S | uxth z6.s, p3/m, z18.s // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTW_ZPmZ_D | uxtw z23.d, p4/m, z3.d // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxth w7, w14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v1.4s, v22.4h, #0 // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #0 // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP1v2i32 | uzp1 v9.2s, v29.2s, v20.2s // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP1_PPP_D | uzp1 p5.d, p3.d, p5.d // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP2_PPP_S | uzp2 p6.s, p0.s, p6.s // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP2v4i32 | uzp2 v18.4s, v12.4s, v31.4s // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfe // WFE \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfi // WFI \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELE_PXX_H | whilele p6.h, x28, x30 // WHILELE <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELO_PXX_B | whilelo p3.b, x9, x7 // WHILELO <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELS_PWW_B | whilels p4.b, w4, w20 // WHILELS <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELT_PXX_S | whilelt p7.s, x20, x6 // WHILELT <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | WRFFR | wrffr p7.b // WRFFR <Pn>.B \\ Write to first fault register \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv8i8 | xtn v20.8b, v17.8h // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv16i8 | xtn2 v31.16b, v26.8h // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | yield // YIELD \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP1v2i64 | zip1 v21.2d, v4.2d, v11.2d // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP1_PPP_D | zip1 p0.d, p1.d, p4.d // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP2_PPP_S | zip2 p3.s, p5.s, p4.s // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP2v4i32 | zip2 v2.4s, v20.4s, v5.4s // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
index 6b7616d68d4cd07..bc336ab90e8b462 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
@@ -2506,11 +2506,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 and z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0x6
# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0xf9
-# CHECK-NEXT: 2 2 2.00 ands p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 4 12 2.00 andv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 andv d0, p7, z31.d
-# CHECK-NEXT: 4 12 2.00 andv h0, p7, z31.h
-# CHECK-NEXT: 4 12 2.00 andv s0, p7, z31.s
+# CHECK-NEXT: 1 2 2.00 ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 12 2.00 andv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 andv d0, p7, z31.d
+# CHECK-NEXT: 1 12 2.00 andv h0, p7, z31.h
+# CHECK-NEXT: 1 12 2.00 andv s0, p7, z31.s
# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, #1
# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z0.b
# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z1.d
@@ -2573,26 +2573,26 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 bic z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 bic z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 bic z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 2 2 2.00 bics p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 2.00 bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 2.00 bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 2.00 bics p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 brka p0.b, p15/m, p15.b
# CHECK-NEXT: 1 2 1.00 brka p0.b, p15/z, p15.b
-# CHECK-NEXT: 2 3 2.00 brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 2.00 brkas p0.b, p15/z, p15.b
# CHECK-NEXT: 1 2 1.00 brkb p0.b, p15/m, p15.b
# CHECK-NEXT: 1 2 1.00 brkb p0.b, p15/z, p15.b
-# CHECK-NEXT: 2 3 2.00 brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 3 2.00 brkbs p0.b, p15/z, p15.b
# CHECK-NEXT: 1 2 1.00 brkn p0.b, p15/z, p1.b, p0.b
# CHECK-NEXT: 1 2 1.00 brkn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 2.00 brkns p0.b, p15/z, p1.b, p0.b
-# CHECK-NEXT: 2 3 2.00 brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 2.00 brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 2.00 brkns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 brkpa p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 2 1.00 brkpa p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 2.00 brkpas p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 2 3 2.00 brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 2.00 brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 2.00 brkpas p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 brkpb p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 2 1.00 brkpb p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 3 2.00 brkpbs p0.b, p15/z, p1.b, p2.b
-# CHECK-NEXT: 2 3 2.00 brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 3 2.00 brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 2.00 brkpbs p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 1.00 clasta b0, p7, b0, z31.b
# CHECK-NEXT: 1 3 1.00 clasta d0, p7, d0, z31.d
# CHECK-NEXT: 1 3 1.00 clasta h0, p7, h0, z31.h
@@ -2840,9 +2840,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.d
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.h
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.s
-# CHECK-NEXT: 3 7 2.00 decp z31.d, p15.d
-# CHECK-NEXT: 3 7 2.00 decp z31.h, p15.h
-# CHECK-NEXT: 3 7 2.00 decp z31.s, p15.s
+# CHECK-NEXT: 2 7 2.00 decp z31.d, p15.d
+# CHECK-NEXT: 2 7 2.00 decp z31.h, p15.h
+# CHECK-NEXT: 2 7 2.00 decp z31.s, p15.s
# CHECK-NEXT: 1 2 1.00 decw x0
# CHECK-NEXT: 1 2 1.00 decw x0, #14
# CHECK-NEXT: 1 2 1.00 decw x0, all, mul #16
@@ -2875,11 +2875,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 eor z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0x6
# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0xf9
-# CHECK-NEXT: 2 2 2.00 eors p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 4 12 2.00 eorv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 eorv d0, p7, z31.d
-# CHECK-NEXT: 4 12 2.00 eorv h0, p7, z31.h
-# CHECK-NEXT: 4 12 2.00 eorv s0, p7, z31.s
+# CHECK-NEXT: 1 2 2.00 eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 12 2.00 eorv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 eorv d0, p7, z31.d
+# CHECK-NEXT: 1 12 2.00 eorv h0, p7, z31.h
+# CHECK-NEXT: 1 12 2.00 eorv s0, p7, z31.s
# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #255
# CHECK-NEXT: 1 2 0.50 fabd z0.d, p7/m, z0.d, z31.d
@@ -2912,12 +2912,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 fadd z31.d, p7/m, z31.d, #1.0
# CHECK-NEXT: 1 2 0.50 fadd z31.h, p7/m, z31.h, #1.0
# CHECK-NEXT: 1 2 0.50 fadd z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 3 8 1.50 fadda d0, p7, d0, z31.d
-# CHECK-NEXT: 18 19 18.00 fadda h0, p7, h0, z31.h
-# CHECK-NEXT: 10 11 10.00 fadda s0, p7, s0, z31.s
-# CHECK-NEXT: 5 9 2.00 faddv d0, p7, z31.d
-# CHECK-NEXT: 6 13 3.00 faddv h0, p7, z31.h
-# CHECK-NEXT: 6 11 2.50 faddv s0, p7, z31.s
+# CHECK-NEXT: 1 8 1.50 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: 1 19 18.00 fadda h0, p7, h0, z31.h
+# CHECK-NEXT: 1 11 10.00 fadda s0, p7, s0, z31.s
+# CHECK-NEXT: 1 9 2.00 faddv d0, p7, z31.d
+# CHECK-NEXT: 1 13 3.00 faddv h0, p7, z31.h
+# CHECK-NEXT: 1 11 2.50 faddv s0, p7, z31.s
# CHECK-NEXT: 1 3 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
# CHECK-NEXT: 1 3 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
# CHECK-NEXT: 1 3 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
@@ -2982,29 +2982,29 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.h
# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.s
# CHECK-NEXT: 1 3 1.00 fcvt z0.h, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 4 2.00 fcvt z0.h, p0/m, z0.s
# CHECK-NEXT: 1 3 1.00 fcvt z0.s, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 fcvt z0.s, p0/m, z0.h
# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.s
-# CHECK-NEXT: 4 6 4.00 fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 6 4.00 fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 6 4.00 fcvtzs z0.h, p0/m, z0.h
# CHECK-NEXT: 1 3 1.00 fcvtzs z0.s, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 fcvtzs z0.s, p0/m, z0.h
-# CHECK-NEXT: 2 4 2.00 fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 6 4.00 fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 fcvtzs z0.s, p0/m, z0.s
# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.s
-# CHECK-NEXT: 4 6 4.00 fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 6 4.00 fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 6 4.00 fcvtzu z0.h, p0/m, z0.h
# CHECK-NEXT: 1 3 1.00 fcvtzu z0.s, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 fcvtzu z0.s, p0/m, z0.h
-# CHECK-NEXT: 2 4 2.00 fcvtzu z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 15 7.00 fdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 13 10.00 fdiv z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 10 7.00 fdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 15 7.00 fdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 13 10.00 fdivr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 10 7.00 fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 6 4.00 fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 15 14.00 fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 12.00 fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 10 9.00 fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 15 14.00 fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 12.00 fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 10 9.00 fdivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 3 0.50 fexpa z0.d, z31.d
# CHECK-NEXT: 1 3 0.50 fexpa z0.h, z31.h
# CHECK-NEXT: 1 3 0.50 fexpa z0.s, z31.s
@@ -3029,12 +3029,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
# CHECK-NEXT: 1 2 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
# CHECK-NEXT: 1 2 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 5 9 2.00 fmaxnmv d0, p7, z31.d
-# CHECK-NEXT: 6 13 3.00 fmaxnmv h0, p7, z31.h
-# CHECK-NEXT: 6 11 2.50 fmaxnmv s0, p7, z31.s
-# CHECK-NEXT: 5 9 2.00 fmaxv d0, p7, z31.d
-# CHECK-NEXT: 6 13 3.00 fmaxv h0, p7, z31.h
-# CHECK-NEXT: 6 11 2.50 fmaxv s0, p7, z31.s
+# CHECK-NEXT: 1 9 2.00 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: 1 13 3.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: 1 11 2.50 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: 1 9 2.00 fmaxv d0, p7, z31.d
+# CHECK-NEXT: 1 13 3.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: 1 11 2.50 fmaxv s0, p7, z31.s
# CHECK-NEXT: 1 2 0.50 fmin z0.d, p0/m, z0.d, #0.0
# CHECK-NEXT: 1 2 0.50 fmin z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 2 0.50 fmin z0.h, p0/m, z0.h, #0.0
@@ -3053,12 +3053,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 fminnm z31.d, p7/m, z31.d, #1.0
# CHECK-NEXT: 1 2 0.50 fminnm z31.h, p7/m, z31.h, #1.0
# CHECK-NEXT: 1 2 0.50 fminnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 5 9 2.00 fminnmv d0, p7, z31.d
-# CHECK-NEXT: 6 13 3.00 fminnmv h0, p7, z31.h
-# CHECK-NEXT: 6 11 2.50 fminnmv s0, p7, z31.s
-# CHECK-NEXT: 5 9 2.00 fminv d0, p7, z31.d
-# CHECK-NEXT: 6 13 3.00 fminv h0, p7, z31.h
-# CHECK-NEXT: 6 11 2.50 fminv s0, p7, z31.s
+# CHECK-NEXT: 1 9 2.00 fminnmv d0, p7, z31.d
+# CHECK-NEXT: 1 13 3.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: 1 11 2.50 fminnmv s0, p7, z31.s
+# CHECK-NEXT: 1 9 2.00 fminv d0, p7, z31.d
+# CHECK-NEXT: 1 13 3.00 fminv h0, p7, z31.h
+# CHECK-NEXT: 1 11 2.50 fminv s0, p7, z31.s
# CHECK-NEXT: 1 4 0.50 fmla z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 4 0.50 fmla z0.d, z1.d, z7.d[1]
# CHECK-NEXT: 1 4 0.50 fmla z0.h, p7/m, z1.h, z31.h
@@ -3119,8 +3119,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: 1 4 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
# CHECK-NEXT: 1 3 1.00 frecpe z0.d, z31.d
-# CHECK-NEXT: 4 6 4.00 frecpe z0.h, z31.h
-# CHECK-NEXT: 2 4 2.00 frecpe z0.s, z31.s
+# CHECK-NEXT: 1 6 1.00 frecpe z0.h, z31.h
+# CHECK-NEXT: 1 4 1.00 frecpe z0.s, z31.s
# CHECK-NEXT: 1 4 0.50 frecps z0.d, z1.d, z31.d
# CHECK-NEXT: 1 4 0.50 frecps z0.h, z1.h, z31.h
# CHECK-NEXT: 1 4 0.50 frecps z0.s, z1.s, z31.s
@@ -3149,17 +3149,17 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 1.00 frintz z31.h, p7/m, z31.h
# CHECK-NEXT: 1 4 1.00 frintz z31.s, p7/m, z31.s
# CHECK-NEXT: 1 3 1.00 frsqrte z0.d, z31.d
-# CHECK-NEXT: 4 6 4.00 frsqrte z0.h, z31.h
-# CHECK-NEXT: 2 4 2.00 frsqrte z0.s, z31.s
+# CHECK-NEXT: 1 6 1.00 frsqrte z0.h, z31.h
+# CHECK-NEXT: 1 4 1.00 frsqrte z0.s, z31.s
# CHECK-NEXT: 1 4 0.50 frsqrts z0.d, z1.d, z31.d
# CHECK-NEXT: 1 4 0.50 frsqrts z0.h, z1.h, z31.h
# CHECK-NEXT: 1 4 0.50 frsqrts z0.s, z1.s, z31.s
# CHECK-NEXT: 1 3 0.50 fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 3 0.50 fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 3 0.50 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 16 7.00 fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 13 10.00 fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 10 7.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 0.50 fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: 1 2 0.50 fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 2 0.50 fsub z0.d, z1.d, z31.d
@@ -3200,15 +3200,15 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 incd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 incd x0, pow2
# CHECK-NEXT: 1 2 1.00 incd x0, vl1
-# CHECK-NEXT: 1 2 1.00 incd z0.d
-# CHECK-NEXT: 1 2 1.00 incd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incd z0.d
+# CHECK-NEXT: 1 2 0.50 incd z0.d, all, mul #16
# CHECK-NEXT: 1 2 1.00 inch x0
# CHECK-NEXT: 1 2 1.00 inch x0, #14
# CHECK-NEXT: 1 2 1.00 inch x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 inch x0, pow2
# CHECK-NEXT: 1 2 1.00 inch x0, vl1
-# CHECK-NEXT: 1 2 1.00 inch z0.h
-# CHECK-NEXT: 1 2 1.00 inch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 inch z0.h
+# CHECK-NEXT: 1 2 0.50 inch z0.h, all, mul #16
# CHECK-NEXT: 1 2 1.00 incp x0, p0.b
# CHECK-NEXT: 1 2 1.00 incp x0, p0.d
# CHECK-NEXT: 1 2 1.00 incp x0, p0.h
@@ -3217,28 +3217,28 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.d
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.h
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.s
-# CHECK-NEXT: 3 7 2.00 incp z31.d, p15.d
-# CHECK-NEXT: 3 7 2.00 incp z31.h, p15.h
-# CHECK-NEXT: 3 7 2.00 incp z31.s, p15.s
+# CHECK-NEXT: 2 7 2.00 incp z31.d, p15.d
+# CHECK-NEXT: 2 7 2.00 incp z31.h, p15.h
+# CHECK-NEXT: 2 7 2.00 incp z31.s, p15.s
# CHECK-NEXT: 1 2 1.00 incw x0
# CHECK-NEXT: 1 2 1.00 incw x0, #14
# CHECK-NEXT: 1 2 1.00 incw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 incw x0, pow2
# CHECK-NEXT: 1 2 1.00 incw x0, vl1
-# CHECK-NEXT: 1 2 1.00 incw z0.s
-# CHECK-NEXT: 1 2 1.00 incw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incw z0.s
+# CHECK-NEXT: 1 2 0.50 incw z0.s, all, mul #16
# CHECK-NEXT: 1 4 1.00 index z0.b, #0, #0
-# CHECK-NEXT: 2 5 2.00 index z0.d, #0, #0
+# CHECK-NEXT: 1 5 2.00 index z0.d, #0, #0
# CHECK-NEXT: 1 4 1.00 index z0.h, #0, #0
# CHECK-NEXT: 2 7 1.00 index z0.h, w0, w0
# CHECK-NEXT: 1 4 1.00 index z0.s, #0, #0
# CHECK-NEXT: 2 7 1.00 index z21.b, w10, w21
-# CHECK-NEXT: 4 8 2.00 index z21.d, x10, x21
+# CHECK-NEXT: 2 8 2.00 index z21.d, x10, x21
# CHECK-NEXT: 2 7 1.00 index z21.s, w10, w21
# CHECK-NEXT: 2 7 1.00 index z23.b, #13, w8
# CHECK-NEXT: 2 7 1.00 index z23.b, w13, #8
-# CHECK-NEXT: 4 8 2.00 index z23.d, #13, x8
-# CHECK-NEXT: 4 8 2.00 index z23.d, x13, #8
+# CHECK-NEXT: 2 8 2.00 index z23.d, #13, x8
+# CHECK-NEXT: 2 8 2.00 index z23.d, x13, #8
# CHECK-NEXT: 2 7 1.00 index z23.h, #13, w8
# CHECK-NEXT: 2 7 1.00 index z23.h, w13, #8
# CHECK-NEXT: 2 7 1.00 index z23.s, #13, w8
@@ -3247,10 +3247,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 7 1.00 index z31.b, #-1, wzr
# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, #-1
# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, wzr
-# CHECK-NEXT: 2 5 2.00 index z31.d, #-1, #-1
-# CHECK-NEXT: 4 8 2.00 index z31.d, #-1, xzr
-# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, #-1
-# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, xzr
+# CHECK-NEXT: 1 5 2.00 index z31.d, #-1, #-1
+# CHECK-NEXT: 2 8 2.00 index z31.d, #-1, xzr
+# CHECK-NEXT: 2 8 2.00 index z31.d, xzr, #-1
+# CHECK-NEXT: 2 8 2.00 index z31.d, xzr, xzr
# CHECK-NEXT: 1 4 1.00 index z31.h, #-1, #-1
# CHECK-NEXT: 2 7 1.00 index z31.h, #-1, wzr
# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, #-1
@@ -3291,66 +3291,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [x0, x0]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [x0]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.33 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 0.33 * ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 11 4.00 * ld1b { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.b }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.h }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 6 0.50 * ld1b { z23.d }, p3/z, [x13, x8]
# CHECK-NEXT: 1 6 0.50 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1b { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1b { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: 1 6 0.50 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 11 0.33 * ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 11 4.00 * ld1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: 1 6 0.50 * ld1b { z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 2 11 4.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 2 11 4.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
# CHECK-NEXT: 1 6 0.50 * ld1d { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1d { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 6 0.50 * ld1d { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
# CHECK-NEXT: 1 6 0.50 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 2 11 4.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
# CHECK-NEXT: 1 6 0.50 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1d { z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.33 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 0.33 * ld1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 2 9 2.00 * ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 2 7 0.50 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 2 7 0.50 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1h { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 1 6 0.50 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 11 0.67 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 11 0.67 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 2 11 0.33 * ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 7 0.50 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: 2 7 0.50 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
# CHECK-NEXT: 2 7 0.50 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1rb { z0.b }, p0/z, [x0]
@@ -3406,252 +3406,252 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 0.50 * ld1rw { z31.d }, p7/z, [sp, #252]
# CHECK-NEXT: 1 6 0.50 * ld1rw { z31.s }, p7/z, [sp, #252]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.h }, p0/z, [sp, x0]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.h }, p0/z, [x0]
-# CHECK-NEXT: 2 9 0.33 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 0.33 * ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 11 4.00 * ld1sb { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z23.d }, p3/z, [x13, x8]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sb { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sb { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 11 0.33 * ld1sb { z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 2 9 0.33 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 0.33 * ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 2 7 0.50 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 2 7 0.50 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: 2 7 0.50 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sh { z31.d }, p7/z, [z31.d, #62]
# CHECK-NEXT: 1 6 0.50 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 11 0.67 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 11 0.67 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: 2 11 0.33 * ld1sh { z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 11 4.00 * ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 11 4.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sw { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 2 11 4.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1sw { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 2 9 2.00 * ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
# CHECK-NEXT: 1 6 0.50 * ld1w { z0.d }, p0/z, [x0]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 2 9 0.33 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1w { z0.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 0.33 * ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: 1 6 0.50 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
# CHECK-NEXT: 1 6 0.50 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: 4 9 0.67 * ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * ld1w { z31.d }, p7/z, [z31.d, #124]
# CHECK-NEXT: 1 6 0.50 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 11 0.67 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: 4 11 0.67 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT: 2 11 0.33 * ld1w { z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT: 4 9 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 8 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT: 4 8 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 4 8 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 4 9 1.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 4 9 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 8 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT: 4 8 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 4 8 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 4 9 1.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 4 10 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 8 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT: 4 8 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 4 8 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 4 10 1.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 4 9 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 8 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT: 4 8 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 4 8 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 4 9 1.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 7 8 1.50 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 6 11 1.50 * ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: 6 11 1.50 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 6 11 1.50 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 7 8 1.50 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 7 8 1.50 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 6 11 1.50 * ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: 6 11 1.50 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 6 11 1.50 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 7 8 1.50 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 7 8 1.50 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 6 11 1.50 * ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: 6 11 1.50 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 6 11 1.50 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 7 8 1.50 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 7 8 1.50 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 6 11 1.50 * ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: 6 11 1.50 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 6 11 1.50 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 7 8 1.50 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 10 13 2.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 8 12 2.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: 8 12 2.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 8 12 2.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 10 13 2.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 10 13 2.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 8 12 2.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: 8 12 2.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 8 12 2.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 10 13 2.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 10 13 2.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 8 12 2.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: 8 12 2.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 8 12 2.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 10 13 2.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 10 13 2.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 8 12 2.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: 8 12 2.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 8 12 2.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 10 13 2.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 2 9 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 8 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 2 8 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 1.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 2 9 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 8 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 2 8 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 1.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 10 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 8 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 2 8 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 10 1.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 2 9 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 8 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 2 8 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 2 8 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 9 1.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 13 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 11 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 2 11 3.00 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 11 3.00 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 13 3.00 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 13 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 11 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 2 11 3.00 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 11 3.00 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 13 3.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 13 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 11 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 2 11 3.00 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 11 3.00 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 13 3.00 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 13 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 11 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 2 11 3.00 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 2 11 3.00 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 13 3.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 13 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 12 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 2 12 4.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 2 12 4.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 13 4.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 13 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 12 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 2 12 4.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 2 12 4.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 13 4.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 13 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 12 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 2 12 4.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 2 12 4.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 13 4.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 13 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 12 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 2 12 4.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 2 12 4.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 13 4.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 9 0.67 * U ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.s }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 9 0.33 * U ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * U ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 2 11 0.33 * U ldff1b { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 4 9 0.67 * U ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z31.b }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 9 2.00 * U ldff1b { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z31.h }, p7/z, [sp]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z31.s }, p7/z, [sp]
-# CHECK-NEXT: 2 11 0.33 * U ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 11 4.00 * U ldff1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: 2 6 0.50 * U ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 11 4.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 2 11 4.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 2 9 2.00 * U ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 2 9 2.00 * U ldff1d { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1d { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 2 9 2.00 * U ldff1d { z31.d }, p7/z, [z31.d, #248]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 9 0.33 * U ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * U ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 2 11 0.33 * U ldff1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 9 2.00 * U ldff1h { z31.d }, p7/z, [z31.d, #62]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z31.h }, p7/z, [sp]
-# CHECK-NEXT: 4 11 0.67 * U ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 11 0.67 * U ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
# CHECK-NEXT: 2 7 0.50 * U ldff1h { z31.s }, p7/z, [sp]
-# CHECK-NEXT: 2 11 0.33 * U ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 11 4.00 * U ldff1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z0.d }, p0/z, [x0, x0]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z0.s }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 9 0.33 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 2 11 0.33 * U ldff1sb { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sb { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z31.h }, p7/z, [sp]
# CHECK-NEXT: 2 6 0.50 * U ldff1sb { z31.s }, p7/z, [sp]
-# CHECK-NEXT: 2 11 0.33 * U ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sb { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: 2 7 0.50 * U ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 7 0.50 * U ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 9 0.33 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 2 11 0.33 * U ldff1sh { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 7 0.50 * U ldff1sh { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sh { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: 4 11 0.67 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: 4 11 0.67 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
# CHECK-NEXT: 2 7 0.50 * U ldff1sh { z31.s }, p7/z, [sp]
-# CHECK-NEXT: 2 11 0.33 * U ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sh { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: 2 6 0.50 * U ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sw { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1sw { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 9 2.00 * U ldff1sw { z31.d }, p7/z, [z31.d, #124]
# CHECK-NEXT: 2 6 0.50 * U ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 9 0.33 * U ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: 2 9 0.33 * U ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: 2 11 0.33 * U ldff1w { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1w { z31.d }, p7/z, [sp]
-# CHECK-NEXT: 4 9 0.67 * U ldff1w { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: 4 11 0.67 * U ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: 4 11 0.67 * U ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 2 9 2.00 * U ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
# CHECK-NEXT: 2 6 0.50 * U ldff1w { z31.s }, p7/z, [sp]
-# CHECK-NEXT: 2 11 0.33 * U ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 2 11 4.00 * U ldff1w { z31.s }, p7/z, [z31.s, #124]
# CHECK-NEXT: 1 6 0.50 * U ldnf1b { z0.b }, p0/z, [x0]
# CHECK-NEXT: 1 6 0.50 * U ldnf1b { z0.d }, p0/z, [x0]
# CHECK-NEXT: 1 6 0.50 * U ldnf1b { z0.h }, p0/z, [x0]
@@ -3782,9 +3782,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 lsrr z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1 2 1.00 lsrr z0.h, p0/m, z0.h, z0.h
# CHECK-NEXT: 1 2 1.00 lsrr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 2 5 2.00 mad z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 2 5 2.00 mla z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 2 5 2.00 mls z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 5 2.00 mad z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 5 2.00 mla z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 5 2.00 mls z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0/m, p0.b
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0/z, p0.b
@@ -3888,36 +3888,36 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 mov z5.h, #-6
# CHECK-NEXT: 1 2 0.50 mov z5.q, z17.q[3]
# CHECK-NEXT: 1 2 0.50 mov z5.s, #-6
-# CHECK-NEXT: 2 2 2.00 movs p0.b, p0.b
-# CHECK-NEXT: 2 2 2.00 movs p0.b, p0/z, p0.b
-# CHECK-NEXT: 2 2 2.00 movs p15.b, p15.b
-# CHECK-NEXT: 2 2 2.00 movs p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 2.00 movs p0.b, p0.b
+# CHECK-NEXT: 1 2 2.00 movs p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 2.00 movs p15.b, p15.b
+# CHECK-NEXT: 1 2 2.00 movs p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 0.07 U mrs x3, ID_AA64ZFR0_EL1
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL1
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL12
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL2
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL1, x3
-# CHECK-NEXT: 2 5 2.00 msb z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 5 2.00 msb z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL12, x3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL2, x3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL3, x3
# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 2 5 2.00 mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 5 2.00 mul z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 4 1.00 mul z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 4 1.00 mul z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #-128
# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #127
-# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #-128
-# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #127
+# CHECK-NEXT: 1 5 2.00 mul z31.d, z31.d, #-128
+# CHECK-NEXT: 1 5 2.00 mul z31.d, z31.d, #127
# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #-128
# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #127
# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #-128
# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #127
# CHECK-NEXT: 1 1 1.00 nand p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nand p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 2.00 nands p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 2.00 nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 2.00 nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 2.00 nands p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 0.50 neg z0.b, p0/m, z0.b
# CHECK-NEXT: 1 2 0.50 neg z0.d, p0/m, z0.d
# CHECK-NEXT: 1 2 0.50 neg z0.h, p0/m, z0.h
@@ -3928,20 +3928,20 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 neg z31.s, p7/m, z31.s
# CHECK-NEXT: 1 1 1.00 nor p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nor p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 2.00 nors p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 2.00 nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 2.00 nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 2.00 nors p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 not p0.b, p0/z, p0.b
# CHECK-NEXT: 1 1 1.00 not p15.b, p15/z, p15.b
# CHECK-NEXT: 1 2 0.50 not z31.b, p7/m, z31.b
# CHECK-NEXT: 1 2 0.50 not z31.d, p7/m, z31.d
# CHECK-NEXT: 1 2 0.50 not z31.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.50 not z31.s, p7/m, z31.s
-# CHECK-NEXT: 2 2 2.00 nots p0.b, p0/z, p0.b
-# CHECK-NEXT: 2 2 2.00 nots p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 2.00 nots p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 2.00 nots p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 1.00 orn p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 orn p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 2 2 2.00 orns p0.b, p0/z, p0.b, p0.b
-# CHECK-NEXT: 2 2 2.00 orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 2.00 orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 2.00 orns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 orr p0.b, p0/z, p0.b, p1.b
# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0x6
# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
@@ -3956,11 +3956,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 orr z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0x6
# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0xf9
-# CHECK-NEXT: 2 2 2.00 orrs p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 4 12 2.00 orv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 orv d0, p7, z31.d
-# CHECK-NEXT: 4 12 2.00 orv h0, p7, z31.h
-# CHECK-NEXT: 4 12 2.00 orv s0, p7, z31.s
+# CHECK-NEXT: 1 2 2.00 orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 12 2.00 orv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 orv d0, p7, z31.d
+# CHECK-NEXT: 1 12 2.00 orv h0, p7, z31.h
+# CHECK-NEXT: 1 12 2.00 orv s0, p7, z31.s
# CHECK-NEXT: 1 2 1.00 pfalse p15.b
# CHECK-NEXT: 1 2 1.00 pfirst p0.b, p15, p0.b
# CHECK-NEXT: 1 2 1.00 pfirst p15.b, p15, p15.b
@@ -4010,45 +4010,45 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl64
# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl7
# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl8
-# CHECK-NEXT: 2 3 2.00 ptrues p0.b, pow2
-# CHECK-NEXT: 2 3 2.00 ptrues p0.d, pow2
-# CHECK-NEXT: 2 3 2.00 ptrues p0.h, pow2
-# CHECK-NEXT: 2 3 2.00 ptrues p0.s, pow2
-# CHECK-NEXT: 2 3 2.00 ptrues p15.b
-# CHECK-NEXT: 2 3 2.00 ptrues p15.d
-# CHECK-NEXT: 2 3 2.00 ptrues p15.h
-# CHECK-NEXT: 2 3 2.00 ptrues p15.s
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #14
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #15
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #16
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #17
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #18
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #19
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #20
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #21
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #22
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #23
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #24
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #25
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #26
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #27
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, #28
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, mul3
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, mul4
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl1
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl128
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl16
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl2
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl256
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl3
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl32
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl4
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl5
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl6
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl64
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl7
-# CHECK-NEXT: 2 3 2.00 ptrues p7.s, vl8
+# CHECK-NEXT: 1 3 2.00 ptrues p0.b, pow2
+# CHECK-NEXT: 1 3 2.00 ptrues p0.d, pow2
+# CHECK-NEXT: 1 3 2.00 ptrues p0.h, pow2
+# CHECK-NEXT: 1 3 2.00 ptrues p0.s, pow2
+# CHECK-NEXT: 1 3 2.00 ptrues p15.b
+# CHECK-NEXT: 1 3 2.00 ptrues p15.d
+# CHECK-NEXT: 1 3 2.00 ptrues p15.h
+# CHECK-NEXT: 1 3 2.00 ptrues p15.s
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #14
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #15
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #16
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #17
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #18
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #19
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #20
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #21
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #22
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #23
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #24
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #25
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #26
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #27
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, #28
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, mul3
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, mul4
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl1
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl128
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl16
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl2
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl256
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl3
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl32
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl4
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl5
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl6
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl64
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl7
+# CHECK-NEXT: 1 3 2.00 ptrues p7.s, vl8
# CHECK-NEXT: 1 2 1.00 punpkhi p0.h, p0.b
# CHECK-NEXT: 1 2 1.00 punpkhi p15.h, p15.b
# CHECK-NEXT: 1 2 1.00 punpklo p0.h, p0.b
@@ -4058,11 +4058,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.50 rbit z0.s, p7/m, z31.s
# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
-# CHECK-NEXT: 2 3 2.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 3 2.00 * U rdffr p0.b, p0/z
# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
-# CHECK-NEXT: 2 3 2.00 * U rdffr p15.b, p15/z
-# CHECK-NEXT: 1 4 0.50 U rdffrs p0.b, p0/z
-# CHECK-NEXT: 1 4 0.50 U rdffrs p15.b, p15/z
+# CHECK-NEXT: 1 3 2.00 * U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 4 3.00 U rdffrs p0.b, p0/z
+# CHECK-NEXT: 1 4 3.00 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 2 1.00 rdvl x0, #0
# CHECK-NEXT: 1 2 1.00 rdvl x21, #-32
# CHECK-NEXT: 1 2 1.00 rdvl x23, #31
@@ -4081,18 +4081,18 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 sabd z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 sabd z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 sabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 5 14 2.00 saddv d0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 saddv d0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 saddv d0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 saddv d0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 saddv d0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 saddv d0, p7, z31.s
# CHECK-NEXT: 1 3 1.00 scvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: 4 6 4.00 scvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: 2 4 2.00 scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 6 4.00 scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 scvtf z0.h, p0/m, z0.s
# CHECK-NEXT: 1 3 1.00 scvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 scvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 20 7.00 sdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 7.00 sdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 20 7.00 sdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 7.00 sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 2.00 scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z31.b
@@ -4114,9 +4114,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 smax z31.h, z31.h, #127
# CHECK-NEXT: 1 2 0.50 smax z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 smax z31.s, z31.s, #127
-# CHECK-NEXT: 5 14 2.00 smaxv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 smaxv h0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 smaxv s0, p7, z31.s
# CHECK-NEXT: 1 2 0.50 smin z0.b, z0.b, #-128
# CHECK-NEXT: 1 2 0.50 smin z0.d, z0.d, #-128
# CHECK-NEXT: 1 2 0.50 smin z0.h, z0.h, #-128
@@ -4129,12 +4129,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 smin z31.h, z31.h, #127
# CHECK-NEXT: 1 2 0.50 smin z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 smin z31.s, z31.s, #127
-# CHECK-NEXT: 5 14 2.00 sminv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 sminv h0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 sminv s0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 sminv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 sminv h0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 sminv s0, p7, z31.s
# CHECK-NEXT: 1 3 0.50 smmla z0.s, z1.b, z2.b
# CHECK-NEXT: 1 4 1.00 smulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 2 5 2.00 smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 5 2.00 smulh z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 4 1.00 smulh z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 4 1.00 smulh z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 3 1.00 splice z31.b, p7, z31.b, z31.b
@@ -4174,10 +4174,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdecd z0.d
-# CHECK-NEXT: 1 2 1.00 sqdecd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdecd z0.d, pow2
-# CHECK-NEXT: 1 2 1.00 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech x0
# CHECK-NEXT: 1 2 1.00 sqdech x0, #14
# CHECK-NEXT: 1 2 1.00 sqdech x0, all, mul #16
@@ -4187,10 +4187,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdech z0.h
-# CHECK-NEXT: 1 2 1.00 sqdech z0.h, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdech z0.h, pow2
-# CHECK-NEXT: 1 2 1.00 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.b
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.d
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.h
@@ -4199,9 +4199,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.d, wzr
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.h, wzr
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.s, wzr
-# CHECK-NEXT: 3 7 2.00 sqdecp z0.d, p0.d
-# CHECK-NEXT: 3 7 2.00 sqdecp z0.h, p0.h
-# CHECK-NEXT: 3 7 2.00 sqdecp z0.s, p0.s
+# CHECK-NEXT: 2 7 2.00 sqdecp z0.d, p0.d
+# CHECK-NEXT: 2 7 2.00 sqdecp z0.h, p0.h
+# CHECK-NEXT: 2 7 2.00 sqdecp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 sqdecw x0
# CHECK-NEXT: 1 2 1.00 sqdecw x0, #14
# CHECK-NEXT: 1 2 1.00 sqdecw x0, all, mul #16
@@ -4211,10 +4211,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdecw z0.s
-# CHECK-NEXT: 1 2 1.00 sqdecw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqdecw z0.s, pow2
-# CHECK-NEXT: 1 2 1.00 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincb x0
# CHECK-NEXT: 1 2 1.00 sqincb x0, #14
# CHECK-NEXT: 1 2 1.00 sqincb x0, all, mul #16
@@ -4233,10 +4233,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqincd z0.d
-# CHECK-NEXT: 1 2 1.00 sqincd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqincd z0.d, pow2
-# CHECK-NEXT: 1 2 1.00 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch x0
# CHECK-NEXT: 1 2 1.00 sqinch x0, #14
# CHECK-NEXT: 1 2 1.00 sqinch x0, all, mul #16
@@ -4246,10 +4246,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqinch z0.h
-# CHECK-NEXT: 1 2 1.00 sqinch z0.h, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqinch z0.h, pow2
-# CHECK-NEXT: 1 2 1.00 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.b
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.d
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.h
@@ -4258,9 +4258,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.d, wzr
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.h, wzr
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.s, wzr
-# CHECK-NEXT: 3 7 2.00 sqincp z0.d, p0.d
-# CHECK-NEXT: 3 7 2.00 sqincp z0.h, p0.h
-# CHECK-NEXT: 3 7 2.00 sqincp z0.s, p0.s
+# CHECK-NEXT: 2 7 2.00 sqincp z0.d, p0.d
+# CHECK-NEXT: 2 7 2.00 sqincp z0.h, p0.h
+# CHECK-NEXT: 2 7 2.00 sqincp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 sqincw x0
# CHECK-NEXT: 1 2 1.00 sqincw x0, #14
# CHECK-NEXT: 1 2 1.00 sqincw x0, all, mul #16
@@ -4270,10 +4270,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 1.00 sqincw z0.s
-# CHECK-NEXT: 1 2 1.00 sqincw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 1.00 sqincw z0.s, pow2
-# CHECK-NEXT: 1 2 1.00 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, z0.b
# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, #0
@@ -4292,148 +4292,148 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0, x0]
-# CHECK-NEXT: 2 6 0.50 * st1b { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: 2 6 0.50 * st1b { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: 2 6 0.50 * st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 2 6 2.00 * st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 2 6 2.00 * st1b { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0]
-# CHECK-NEXT: 2 6 0.50 * st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1b { z0.d }, p7, [z0.d]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0, x0]
-# CHECK-NEXT: 4 10 1.00 * st1b { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: 4 10 1.00 * st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 10 4.00 * st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 10 4.00 * st1b { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0]
-# CHECK-NEXT: 4 10 1.00 * st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 10 4.00 * st1b { z0.s }, p7, [z0.s]
# CHECK-NEXT: 2 2 0.50 * st1b { z21.b }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z21.h }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z31.b }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 6 0.50 * st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: 2 6 2.00 * st1b { z31.d }, p7, [z31.d, #31]
# CHECK-NEXT: 2 2 0.50 * st1b { z31.h }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1b { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: 2 10 4.00 * st1b { z31.s }, p7, [z31.s, #31]
# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d, lsl #3]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0]
-# CHECK-NEXT: 2 6 0.50 * st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1d { z0.d }, p7, [z0.d]
# CHECK-NEXT: 2 2 0.50 * st1d { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1d { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 6 0.50 * st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: 2 6 2.00 * st1d { z31.d }, p7, [z31.d, #248]
# CHECK-NEXT: 3 2 0.50 * st1h { z0.d }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d, lsl #1]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: 2 2 0.50 * st1h { z0.d }, p0, [x0]
-# CHECK-NEXT: 2 6 0.50 * st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1h { z0.d }, p7, [z0.d]
# CHECK-NEXT: 3 2 0.50 * st1h { z0.h }, p0, [x0, x0, lsl #1]
# CHECK-NEXT: 2 2 0.50 * st1h { z0.h }, p0, [x0]
# CHECK-NEXT: 3 2 0.50 * st1h { z0.s }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 4 10 1.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
-# CHECK-NEXT: 4 10 1.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: 4 10 1.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
-# CHECK-NEXT: 4 10 1.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 10 4.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: 2 10 4.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 10 4.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: 2 10 4.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: 2 2 0.50 * st1h { z0.s }, p0, [x0]
-# CHECK-NEXT: 4 10 1.00 * st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 10 4.00 * st1h { z0.s }, p7, [z0.s]
# CHECK-NEXT: 2 2 0.50 * st1h { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1h { z21.h }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1h { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1h { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 6 0.50 * st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: 2 6 2.00 * st1h { z31.d }, p7, [z31.d, #62]
# CHECK-NEXT: 2 2 0.50 * st1h { z31.h }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1h { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: 2 10 4.00 * st1h { z31.s }, p7, [z31.s, #62]
# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d, lsl #2]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0]
-# CHECK-NEXT: 2 6 0.50 * st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 6 2.00 * st1w { z0.d }, p7, [z0.d]
# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 4 10 1.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
-# CHECK-NEXT: 4 10 1.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: 4 10 1.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
-# CHECK-NEXT: 4 10 1.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 10 4.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: 2 10 4.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 2 10 4.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: 2 10 4.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0]
-# CHECK-NEXT: 4 10 1.00 * st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 10 4.00 * st1w { z0.s }, p7, [z0.s]
# CHECK-NEXT: 2 2 0.50 * st1w { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1w { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: 2 2 0.50 * st1w { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 2 6 0.50 * st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: 2 6 2.00 * st1w { z31.d }, p7, [z31.d, #124]
# CHECK-NEXT: 2 2 0.50 * st1w { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: 4 10 1.00 * st1w { z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2b { z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 4 0.50 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 4 0.50 * st2h { z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 4 0.50 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT: 2 4 0.50 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 4 0.50 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 15 7 2.50 * st3b { z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT: 10 7 2.50 * st3b { z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * st3b { z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT: 15 7 2.50 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 10 7 2.50 * st3d { z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 15 7 2.50 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 10 7 2.50 * st3h { z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 15 7 2.50 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 10 7 2.50 * st3w { z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT: 10 7 2.50 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: 10 7 2.50 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: 15 7 2.50 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: 27 11 4.50 * st4b { z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT: 18 19 4.50 * st4b { z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT: 18 19 4.50 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 19 4.50 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * st4b { z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT: 27 11 4.50 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: 18 19 4.50 * st4d { z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT: 18 19 4.50 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 19 4.50 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: 27 11 4.50 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: 18 19 4.50 * st4h { z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT: 18 19 4.50 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 19 4.50 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: 27 11 4.50 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: 18 19 4.50 * st4w { z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT: 18 19 4.50 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: 18 19 4.50 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: 27 11 4.50 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 10 4.00 * st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: 2 4 1.00 * st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 4 1.00 * st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: 2 4 1.00 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: 2 4 1.00 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 4 1.00 * st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: 2 4 1.00 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 4 1.00 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 4 1.00 * st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: 2 4 1.00 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 4 1.00 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 2 4 1.00 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 4 1.00 * st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: 2 4 1.00 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 2 4 1.00 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 7 4.50 * st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 7 4.50 * st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 2 7 4.50 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 2 7 4.50 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 7 4.50 * st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 3 7 4.50 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 7 4.50 * st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 2 7 4.50 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 2 7 4.50 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 7 4.50 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 7 4.50 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 7 4.50 * st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 2 7 4.50 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 2 7 4.50 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 7 4.50 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 7 4.50 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 7 4.50 * st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 2 7 4.50 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 2 7 4.50 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 3 7 4.50 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 11 9.00 * st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 11 9.00 * st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 2 11 9.00 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 2 11 9.00 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 11 9.00 * st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 3 11 9.00 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 11 9.00 * st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 2 11 9.00 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 2 11 9.00 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 11 9.00 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 3 11 9.00 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 11 9.00 * st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 2 11 9.00 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 2 11 9.00 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 11 9.00 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 11 9.00 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 11 9.00 * st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 2 11 9.00 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 2 11 9.00 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 3 11 9.00 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * stnt1b { z21.b }, p5, [x10, #7, mul vl]
@@ -4514,7 +4514,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 subr z31.d, z31.d, #65280
# CHECK-NEXT: 1 2 0.50 subr z31.h, z31.h, #65280
# CHECK-NEXT: 1 2 0.50 subr z31.s, z31.s, #65280
-# CHECK-NEXT: 1 3 0.25 sudot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 3 0.50 sudot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 2 0.50 sunpkhi z31.d, z31.s
# CHECK-NEXT: 1 2 0.50 sunpkhi z31.h, z31.b
# CHECK-NEXT: 1 2 0.50 sunpkhi z31.s, z31.h
@@ -4557,18 +4557,18 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 uabd z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 uabd z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 uabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 5 14 2.00 uaddv d0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 uaddv d0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 uaddv d0, p7, z31.s
# CHECK-NEXT: 1 3 1.00 ucvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: 4 6 4.00 ucvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: 2 4 2.00 ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 6 4.00 ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 4 2.00 ucvtf z0.h, p0/m, z0.s
# CHECK-NEXT: 1 3 1.00 ucvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: 2 4 2.00 ucvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 20 7.00 udiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 7.00 udiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 20 7.00 udivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 7.00 udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 2.00 ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z31.h
# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z31.b
@@ -4579,21 +4579,21 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 umax z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 umax z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 umax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 5 14 2.00 umaxv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 umaxv h0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 umaxv s0, p7, z31.s
# CHECK-NEXT: 1 2 0.50 umin z0.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 umin z31.b, p7/m, z31.b, z31.b
# CHECK-NEXT: 1 2 0.50 umin z31.b, z31.b, #255
# CHECK-NEXT: 1 2 0.50 umin z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 umin z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 umin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 5 14 2.00 uminv b0, p7, z31.b
-# CHECK-NEXT: 4 12 2.00 uminv h0, p7, z31.h
-# CHECK-NEXT: 4 10 2.00 uminv s0, p7, z31.s
+# CHECK-NEXT: 1 14 2.00 uminv b0, p7, z31.b
+# CHECK-NEXT: 1 12 2.00 uminv h0, p7, z31.h
+# CHECK-NEXT: 1 10 2.00 uminv s0, p7, z31.s
# CHECK-NEXT: 1 3 0.50 ummla z0.s, z1.b, z2.b
# CHECK-NEXT: 1 4 1.00 umulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 2 5 2.00 umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 5 2.00 umulh z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: 1 4 1.00 umulh z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: 1 4 1.00 umulh z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: 1 2 0.50 uqadd z0.b, z0.b, #0
@@ -4629,10 +4629,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecd x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdecd x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqdecd z0.d
-# CHECK-NEXT: 1 2 1.00 uqdecd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqdecd z0.d, pow2
-# CHECK-NEXT: 1 2 1.00 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech w0
# CHECK-NEXT: 1 2 1.00 uqdech w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech w0, pow2
@@ -4642,10 +4642,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdech x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdech x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqdech z0.h
-# CHECK-NEXT: 1 2 1.00 uqdech z0.h, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqdech z0.h, pow2
-# CHECK-NEXT: 1 2 1.00 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.b
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.d
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.h
@@ -4654,9 +4654,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.d
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.h
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.s
-# CHECK-NEXT: 3 7 2.00 uqdecp z0.d, p0.d
-# CHECK-NEXT: 3 7 2.00 uqdecp z0.h, p0.h
-# CHECK-NEXT: 3 7 2.00 uqdecp z0.s, p0.s
+# CHECK-NEXT: 2 7 2.00 uqdecp z0.d, p0.d
+# CHECK-NEXT: 2 7 2.00 uqdecp z0.h, p0.h
+# CHECK-NEXT: 2 7 2.00 uqdecp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 uqdecw w0
# CHECK-NEXT: 1 2 1.00 uqdecw w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecw w0, pow2
@@ -4666,10 +4666,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecw x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdecw x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqdecw z0.s
-# CHECK-NEXT: 1 2 1.00 uqdecw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqdecw z0.s, pow2
-# CHECK-NEXT: 1 2 1.00 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqincb w0
# CHECK-NEXT: 1 2 1.00 uqincb w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincb w0, pow2
@@ -4688,10 +4688,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincd x0, pow2
# CHECK-NEXT: 1 2 1.00 uqincd x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqincd z0.d
-# CHECK-NEXT: 1 2 1.00 uqincd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqincd z0.d, pow2
-# CHECK-NEXT: 1 2 1.00 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch w0
# CHECK-NEXT: 1 2 1.00 uqinch w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch w0, pow2
@@ -4701,10 +4701,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqinch x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch x0, pow2
# CHECK-NEXT: 1 2 1.00 uqinch x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqinch z0.h
-# CHECK-NEXT: 1 2 1.00 uqinch z0.h, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqinch z0.h, pow2
-# CHECK-NEXT: 1 2 1.00 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.b
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.d
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.h
@@ -4713,9 +4713,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.d
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.h
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.s
-# CHECK-NEXT: 3 7 2.00 uqincp z0.d, p0.d
-# CHECK-NEXT: 3 7 2.00 uqincp z0.h, p0.h
-# CHECK-NEXT: 3 7 2.00 uqincp z0.s, p0.s
+# CHECK-NEXT: 2 7 2.00 uqincp z0.d, p0.d
+# CHECK-NEXT: 2 7 2.00 uqincp z0.h, p0.h
+# CHECK-NEXT: 2 7 2.00 uqincp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 uqincw w0
# CHECK-NEXT: 1 2 1.00 uqincw w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincw w0, pow2
@@ -4725,10 +4725,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincw x0, pow2
# CHECK-NEXT: 1 2 1.00 uqincw x0, vl1
-# CHECK-NEXT: 1 2 1.00 uqincw z0.s
-# CHECK-NEXT: 1 2 1.00 uqincw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 1.00 uqincw z0.s, pow2
-# CHECK-NEXT: 1 2 1.00 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, #0
# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, z0.b
# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, #0
@@ -4744,8 +4744,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 uqsub z31.d, z31.d, #65280
# CHECK-NEXT: 1 2 0.50 uqsub z31.h, z31.h, #65280
# CHECK-NEXT: 1 2 0.50 uqsub z31.s, z31.s, #65280
-# CHECK-NEXT: 1 3 0.25 usdot z0.s, z1.b, z31.b
-# CHECK-NEXT: 1 3 0.25 usdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 3 0.50 usdot z0.s, z1.b, z31.b
+# CHECK-NEXT: 1 3 0.50 usdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 3 0.50 usmmla z0.s, z1.b, z2.b
# CHECK-NEXT: 1 2 0.50 uunpkhi z31.d, z31.s
# CHECK-NEXT: 1 2 0.50 uunpkhi z31.h, z31.b
@@ -4781,10 +4781,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 uzp2 z31.d, z31.d, z31.d
# CHECK-NEXT: 1 2 0.50 uzp2 z31.h, z31.h, z31.h
# CHECK-NEXT: 1 2 0.50 uzp2 z31.s, z31.s, z31.s
-# CHECK-NEXT: 2 3 2.00 whilele p0.b, w30, wzr
-# CHECK-NEXT: 2 3 2.00 whilelo p15.d, xzr, x30
-# CHECK-NEXT: 2 3 2.00 whilels p0.h, w30, wzr
-# CHECK-NEXT: 2 3 2.00 whilelt p15.s, xzr, x30
+# CHECK-NEXT: 1 3 2.00 whilele p0.b, w30, wzr
+# CHECK-NEXT: 1 3 2.00 whilelo p15.d, xzr, x30
+# CHECK-NEXT: 1 3 2.00 whilels p0.h, w30, wzr
+# CHECK-NEXT: 1 3 2.00 whilelt p15.s, xzr, x30
# CHECK-NEXT: 1 2 1.00 * U wrffr p0.b
# CHECK-NEXT: 1 2 1.00 * U wrffr p15.b
# CHECK-NEXT: 1 2 1.00 zip1 p0.b, p0.b, p0.b
@@ -4842,7 +4842,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
-# CHECK-NEXT: - - - - - - - 88.67 500.67 500.67 797.50 2.50 92.50 92.50 1252.00 921.00 178.50 181.50
+# CHECK-NEXT: - - - - - - - 427.00 1140.50 1140.50 802.50 7.50 196.00 196.00 1778.25 1363.25 541.75 541.75
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
@@ -5271,9 +5271,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - decp z31.d, p15.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - decp z31.h, p15.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - decp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.s, p15.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, all, mul #16
@@ -5346,9 +5346,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - fadda d0, p7, d0, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 18.00 - - - fadda h0, p7, h0, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - - - fadda s0, p7, s0, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.25 2.25 0.25 0.25 faddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - faddv d0, p7, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - faddv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.75 2.75 0.25 0.25 faddv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - faddv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.d, p0/m, z0.d, z0.d, #90
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.h, p0/m, z0.h, z0.h, #90
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.s, p0/m, z0.s, z0.s, #90
@@ -5417,25 +5417,25 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvt z0.s, p0/m, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvt z0.s, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.d, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzs z0.d, p0/m, z0.s
# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.h, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.s, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzs z0.s, p0/m, z0.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.d, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzu z0.d, p0/m, z0.s
# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.h, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.s, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzu z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - - - fdiv z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - - - fdivr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fdivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.s, z31.s
@@ -5460,12 +5460,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.d, p7/m, z31.d, #1.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.h, p7/m, z31.h, #1.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.25 2.25 0.25 0.25 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fmaxnmv d0, p7, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fmaxnmv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.75 2.75 0.25 0.25 fmaxnmv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.25 2.25 0.25 0.25 fmaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fmaxv d0, p7, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fmaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.75 2.75 0.25 0.25 fmaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fmaxv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.d, p0/m, z0.d, #0.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.h, p0/m, z0.h, #0.0
@@ -5484,12 +5484,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.d, p7/m, z31.d, #1.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.h, p7/m, z31.h, #1.0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.25 2.25 0.25 0.25 fminnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fminnmv d0, p7, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fminnmv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.75 2.75 0.25 0.25 fminnmv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.25 2.25 0.25 0.25 fminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fminnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fminv d0, p7, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.75 2.75 0.25 0.25 fminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fminv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.d, z1.d, z7.d[1]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.h, p7/m, z1.h, z31.h
@@ -5550,8 +5550,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmsb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmsb z0.s, p7/m, z1.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - frecpe z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - frecpe z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.d, z1.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.h, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.s, z1.s, z31.s
@@ -5580,17 +5580,17 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintz z31.h, p7/m, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintz z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - frsqrte z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - frsqrte z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.d, z1.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.h, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.s, z1.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.h, p7/m, z0.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - - - fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, p0/m, z0.d, #0.5
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, p7/m, z0.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, z1.d, z31.d
@@ -5631,15 +5631,15 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - incd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - incd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incd z0.d, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - inch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - inch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - inch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - inch z0.h, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.h
@@ -5648,48 +5648,48 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - incp z31.d, p15.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - incp z31.h, p15.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - incp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.s, p15.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - incw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - incw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incw z0.s, all, mul #16
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.b, #0, #0
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - index z0.d, #0, #0
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.h, #0, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z0.h, w0, w0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z0.h, w0, w0
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.s, #0, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z21.b, w10, w21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z21.b, w10, w21
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z21.d, x10, x21
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z21.s, w10, w21
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.b, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.b, w13, #8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z21.s, w10, w21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.b, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.b, w13, #8
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z23.d, #13, x8
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z23.d, x13, #8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.h, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.h, w13, #8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.s, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z23.s, w13, #8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.h, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.h, w13, #8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.s, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.s, w13, #8
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.b, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.b, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.b, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.b, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, wzr, wzr
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - index z31.d, #-1, #-1
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, #-1, xzr
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, xzr, #-1
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, xzr, xzr
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.h, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.h, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.h, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.h, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, wzr, wzr
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.s, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.s, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.s, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - index z31.s, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, wzr, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.b, w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.d, x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.h, w0
@@ -5722,66 +5722,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.b }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.b }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1b { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z21.b }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z21.h }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z23.d }, p3/z, [x13, x8]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1b { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1b { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z5.h }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1d { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1d { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1d { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1d { z31.d }, p7/z, [z31.d, #248]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z0.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z0.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z21.h }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1rb { z0.b }, p0/z, [x0]
@@ -5837,85 +5837,85 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1rw { z31.d }, p7/z, [sp, #252]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1rw { z31.s }, p7/z, [sp, #252]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z0.h }, p0/z, [sp, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z0.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z0.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sb { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z21.s }, p5/z, [x10, x21]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z23.d }, p3/z, [x13, x8]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sb { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sb { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sb { z31.s }, p7/z, [z31.s, #31]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z0.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sh { z31.d }, p7/z, [z31.d, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1sh { z31.s }, p7/z, [z31.s, #62]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sw { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sw { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1sw { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z0.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z0.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z0.s }, p0/z, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z21.d }, p5/z, [x10, #5, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z21.s }, p5/z, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z31.d }, p7/z, [sp, z31.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ld1w { z31.d }, p7/z, [z31.d, #124]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [z31.s, #124]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z0.b, z1.b }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z0.b, z1.b }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
@@ -5926,163 +5926,163 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 1.00 1.00 - - ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z0.h, z1.h }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 1.00 1.00 - - ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z0.s, z1.s }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 1.50 1.50 - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 1.50 1.50 - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 1.00 1.00 2.00 2.00 - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z0.d }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z0.s }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1b { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z31.b }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z31.h }, p7/z, [sp]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z31.s }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1b { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1d { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1d { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1d { z31.d }, p7/z, [z31.d, #248]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1h { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1h { z31.d }, p7/z, [z31.d, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z31.h }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1h { z31.s }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1h { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z0.d }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z0.h }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z0.s }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sb { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sb { z31.d }, p7/z, [z31.d, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z31.h }, p7/z, [sp]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sb { z31.s }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sb { z31.s }, p7/z, [z31.s, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sh { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sh { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z31.d }, p7/z, [z31.d, #62]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sh { z31.s }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sh { z31.s }, p7/z, [z31.s, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z0.d }, p0/z, [z0.d]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sw { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1sw { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1sw { z31.d }, p7/z, [z31.d, #124]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1w { z0.s }, p0/z, [z0.s]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z31.d }, p7/z, [sp, z31.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1w { z31.d }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z31.d }, p7/z, [z31.d, #124]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
-# CHECK-NEXT: - - - - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 0.50 0.50 ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1w { z31.s }, p7/z, [sp]
-# CHECK-NEXT: - - - - - - - 0.33 0.33 0.33 - - - - 0.25 0.25 0.25 0.25 ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ldff1w { z31.s }, p7/z, [z31.s, #124]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldnf1b { z0.b }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldnf1b { z0.d }, p0/z, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldnf1b { z0.h }, p0/z, [x0]
@@ -6492,8 +6492,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - rdffr p0.b, p0/z
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdffr p15.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - rdffr p15.b, p15/z
-# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - rdffrs p0.b, p0/z
-# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - rdffrs p15.b, p15/z
+# CHECK-NEXT: - - - - - - - - - - 3.00 3.00 - - - - - - rdffrs p0.b, p0/z
+# CHECK-NEXT: - - - - - - - - - - 3.00 3.00 - - - - - - rdffrs p15.b, p15/z
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl x0, #0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl x21, #-32
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl x23, #31
@@ -6512,18 +6512,18 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 saddv d0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 saddv d0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 saddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - scvtf z0.d, p0/m, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - scvtf z0.h, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - scvtf z0.h, p0/m, z0.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - scvtf z0.s, p0/m, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - scvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - sdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - sdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - sdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - sdivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sdot z0.s, z1.b, z31.b
@@ -6545,9 +6545,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.h, z31.h, #127
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.s, z31.s, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 smaxv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 smaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 smaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.b, z0.b, #-128
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.d, z0.d, #-128
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.h, z0.h, #-128
@@ -6560,9 +6560,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.h, z31.h, #127
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.s, z31.s, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 sminv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 sminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 sminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smmla z0.s, z1.b, z2.b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - smulh z0.b, p7/m, z0.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - smulh z0.d, p7/m, z0.d, z31.d
@@ -6605,10 +6605,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, all, mul #16
@@ -6618,10 +6618,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdech z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdech z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdech z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.h
@@ -6630,9 +6630,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.d, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.h, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.s, wzr
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqdecp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqdecp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, all, mul #16
@@ -6642,10 +6642,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0, all, mul #16
@@ -6664,10 +6664,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, all, mul #16
@@ -6677,10 +6677,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqinch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqinch z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqinch z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.h
@@ -6689,9 +6689,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.d, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.h, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.s, wzr
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqincp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqincp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - sqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, all, mul #16
@@ -6701,10 +6701,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.b, z0.b, #0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.b, z0.b, z0.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.d, z0.d, #0
@@ -6723,148 +6723,148 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.b }, p0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1b { z0.d }, p7, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.h }, p0, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.h }, p0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.s }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1b { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1b { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1b { z0.s }, p7, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z21.b }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z21.h }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z31.b }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1b { z31.d }, p7, [z31.d, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z31.h }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1b { z31.s }, p7, [z31.s, #31]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d, lsl #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z0.d }, p7, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1d { z31.d }, p7, [z31.d, #248]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d, lsl #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z0.d }, p7, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 0.25 0.25 0.25 0.25 st1h { z0.h }, p0, [x0, x0, lsl #1]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.h }, p0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 0.25 0.25 0.25 0.25 st1h { z0.s }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z0.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z0.s }, p7, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z21.h }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1h { z31.d }, p7, [z31.d, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z31.h }, p7, [sp, #-1, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1h { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1h { z31.s }, p7, [z31.s, #62]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d, lsl #2]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d, sxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d, uxtw]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z0.d }, p7, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z0.s }, p0, [x0, z0.s, sxtw]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, uxtw]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z0.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z0.s }, p7, [z0.s]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z21.d }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z21.s }, p5, [x10, #5, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z31.d }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 1.00 1.00 st1w { z31.d }, p7, [z31.d, #124]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1w { z31.s }, p7, [sp, #-1, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st1w { z31.s }, p7, [z31.s, #124]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2b { z0.b, z1.b }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2b { z0.b, z1.b }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2b { z5.b, z6.b }, p3, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2d { z0.d, z1.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 0.25 0.25 0.25 0.25 st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2h { z0.h, z1.h }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 0.25 0.25 0.25 0.25 st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2w { z0.s, z1.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3b { z0.b - z2.b }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3b { z0.b - z2.b }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3b { z5.b - z7.b }, p3, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3d { z0.d - z2.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3h { z0.h - z2.h }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3w { z0.s - z2.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - - - 1.25 1.25 1.25 1.25 st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 2.50 2.50 - - 2.50 2.50 1.25 1.25 1.25 1.25 st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4b { z0.b - z3.b }, p0, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4b { z0.b - z3.b }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4b { z5.b - z8.b }, p3, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4d { z0.d - z3.d }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4h { z0.h - z3.h }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4w { z0.s - z3.s }, p0, [x0]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 0.50 0.50 0.50 0.50 st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 0.50 0.50 0.50 0.50 st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - 2.25 2.25 2.25 2.25 st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - 4.50 4.50 2.25 2.25 2.25 2.25 st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - 4.50 4.50 4.50 4.50 st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 9.00 9.00 - - 9.00 9.00 4.50 4.50 4.50 4.50 st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 stnt1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 stnt1b { z0.b }, p0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 stnt1b { z21.b }, p5, [x10, #7, mul vl]
@@ -6945,7 +6945,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.d, z31.d, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.h, z31.h, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.s, z31.s, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sudot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sudot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.d, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.h, z31.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.s, z31.h
@@ -6988,18 +6988,18 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 uaddv d0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 uaddv d0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 uaddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - ucvtf z0.d, p0/m, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - ucvtf z0.h, p0/m, z0.h
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - ucvtf z0.h, p0/m, z0.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - ucvtf z0.s, p0/m, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - ucvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - udiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - udiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - udivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 7.00 - - - udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - udivr z0.s, p7/m, z0.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z15.h[1]
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - udot z0.s, z1.b, z31.b
@@ -7010,18 +7010,18 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 umaxv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 umaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 umaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z0.b, z0.b, #0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.b, p7/m, z31.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.b, z31.b, #255
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.d, p7/m, z31.d, z31.d
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.h, p7/m, z31.h, z31.h
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.25 2.75 0.25 0.75 uminv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 uminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.75 2.75 0.25 0.25 uminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ummla z0.s, z1.b, z2.b
# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - umulh z0.b, p7/m, z0.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - umulh z0.d, p7/m, z0.d, z31.d
@@ -7060,10 +7060,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0, pow2
@@ -7073,10 +7073,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdech z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdech z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdech z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.h
@@ -7085,9 +7085,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqdecp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqdecp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0, pow2
@@ -7097,10 +7097,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0, pow2
@@ -7119,10 +7119,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0, pow2
@@ -7132,10 +7132,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqinch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqinch z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqinch z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.h
@@ -7144,9 +7144,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqincp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqincp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 - - uqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0, pow2
@@ -7156,10 +7156,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.b, z0.b, #0
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.b, z0.b, z0.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.d, z0.d, #0
@@ -7175,8 +7175,8 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.d, z31.d, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.h, z31.h, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.s, z31.s, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot z0.s, z1.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 usdot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 usdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - usmmla z0.s, z1.b, z2.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpkhi z31.d, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpkhi z31.h, z31.b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
index 264ad8bccc58e2b..ecf43fd3d9c949a 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
@@ -1301,10 +1301,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 1900
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.74
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 3.0
@@ -1320,8 +1320,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1338,19 +1338,19 @@ add x0, x27, 1
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.3 0.1 2.0 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [4] Code Region - G05
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 3.3
@@ -1365,9 +1365,9 @@ add x0, x27, 1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1383,20 +1383,20 @@ add x0, x27, 1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 2.0 <total>
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [5] Code Region - G06
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 3.3
@@ -1411,9 +1411,9 @@ add x0, x27, 1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1429,20 +1429,20 @@ add x0, x27, 1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 2.0 <total>
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [6] Code Region - G07
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2300
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.53
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 4.3
@@ -1456,10 +1456,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1474,21 +1474,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [7] Code Region - G08
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.92
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 5.0
@@ -1502,10 +1502,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1520,21 +1520,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [8] Code Region - G09
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.92
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 5.0
@@ -1548,10 +1548,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1566,21 +1566,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [9] Code Region - G10
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 608
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.11
+# CHECK-NEXT: uOps Per Cycle: 2.47
# CHECK-NEXT: IPC: 1.64
# CHECK-NEXT: Block RThroughput: 5.0
@@ -1594,10 +1594,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,7] D====eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,9] D=====eE-----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1612,21 +1612,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.1 <total>
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 9. 1 6.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.1 <total>
# CHECK: [10] Code Region - G11
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 509
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.72
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 4.7
@@ -1640,10 +1640,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,5] D===eE-----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,7] D====eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,9] D=====eE-----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1658,21 +1658,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.2 <total>
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 9. 1 6.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.2 <total>
# CHECK: [11] Code Region - G12
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.72
+# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 4.7
@@ -1685,11 +1685,11 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE-----R. add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,5] D===eE-----R. add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1703,22 +1703,22 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 2.2 <total>
+# CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.2 <total>
# CHECK: [12] Code Region - G13
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 1110
-# CHECK-NEXT: Total uOps: 2600
+# CHECK-NEXT: Total uOps: 1600
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 2.34
+# CHECK-NEXT: uOps Per Cycle: 1.44
# CHECK-NEXT: IPC: 0.90
# CHECK-NEXT: Block RThroughput: 5.0
@@ -1731,10 +1731,10 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,3] D==eE----R. . . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE-----R . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE-----R . . add x0, x27, #1
-# CHECK-NEXT: [0,8] .D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] D===eE-----R . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,7] D====eE-----R . . add x0, x27, #1
+# CHECK-NEXT: [0,8] D==========eeeeeeeeER ld1 { v1.b }[0], [x27], #1
# CHECK-NEXT: [0,9] .D==========eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
@@ -1749,12 +1749,12 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
# CHECK-NEXT: 9. 1 11.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 4.2 0.1 2.5 <total>
+# CHECK-NEXT: 1 4.6 0.1 2.5 <total>
# CHECK: [13] Code Region - G14
@@ -1991,10 +1991,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.71
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 2.5
@@ -2008,7 +2008,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
@@ -2026,21 +2026,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 3.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [19] Code Region - G20
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2900
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.69
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 3.0
@@ -2052,12 +2052,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2070,23 +2070,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [20] Code Region - G21
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2700
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.29
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 2.5
@@ -2099,8 +2099,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
@@ -2117,24 +2117,24 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 3.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [21] Code Region - G22
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 3310
-# CHECK-NEXT: Total uOps: 2600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.79
+# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.30
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
@@ -2145,8 +2145,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
@@ -2163,24 +2163,24 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
-# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 17.0 0.1 3.0 <total>
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [22] Code Region - G23
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 4003
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
@@ -2192,7 +2192,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
@@ -2210,23 +2210,23 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 17.1 0.1 3.0 <total>
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [23] Code Region - G24
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 2603
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.96
+# CHECK-NEXT: uOps Per Cycle: 0.77
# CHECK-NEXT: IPC: 0.38
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
@@ -2238,7 +2238,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D=========eE------R . . . add x0, x27, #1
# CHECK-NEXT: [0,4] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: [0,5] D=================eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,6] D=================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: [0,7] .D=================eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16
# CHECK-NEXT: [0,9] .D==================eE------R add x0, x27, #1
@@ -2256,23 +2256,23 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: 7. 1 18.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
# CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 12.9 0.1 3.0 <total>
+# CHECK-NEXT: 1 13.0 0.1 3.0 <total>
# CHECK: [24] Code Region - G25
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.90
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2284,7 +2284,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8
# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4
# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
@@ -2302,23 +2302,23 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 3.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [25] Code Region - G26
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.90
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.3
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2330,7 +2330,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
@@ -2348,23 +2348,23 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 3.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [26] Code Region - G27
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 2800
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.49
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 2.8
+# CHECK-NEXT: Block RThroughput: 3.7
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2376,7 +2376,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
@@ -2394,23 +2394,23 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 3.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [27] Code Region - G28
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 3700
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.25
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 4.0
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2420,12 +2420,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2438,25 +2438,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [28] Code Region - G29
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 3800
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.45
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 4.3
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2465,13 +2465,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2483,26 +2483,26 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.6 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [29] Code Region - G30
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 1910
-# CHECK-NEXT: Total uOps: 3700
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 1.94
+# CHECK-NEXT: uOps Per Cycle: 1.05
# CHECK-NEXT: IPC: 0.52
-# CHECK-NEXT: Block RThroughput: 4.0
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
@@ -2512,12 +2512,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,3] D==eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D==========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: [0,7] .D==========eE------R . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,9] . D=================eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,9] .D==================eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2530,25 +2530,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: 9. 1 18.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 6.9 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.4 0.1 3.0 <total>
# CHECK: [30] Code Region - G31
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 4003
-# CHECK-NEXT: Total uOps: 3500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
-# CHECK-NEXT: Block RThroughput: 3.8
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
@@ -2558,12 +2558,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1
# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2576,25 +2576,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
-# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 16.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [31] Code Region - G32
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 4003
-# CHECK-NEXT: Total uOps: 3500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.87
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
-# CHECK-NEXT: Block RThroughput: 3.8
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
@@ -2604,12 +2604,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1
# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2622,25 +2622,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
-# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 16.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [32] Code Region - G33
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 3500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 6.86
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 3.8
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2650,12 +2650,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2668,25 +2668,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [33] Code Region - G34
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 3500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 6.86
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 3.8
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2696,12 +2696,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2714,25 +2714,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [34] Code Region - G35
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 3500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 6.86
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
-# CHECK-NEXT: Block RThroughput: 3.8
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
@@ -2742,12 +2742,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2760,25 +2760,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 3.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [35] Code Region - G36
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 611
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total Cycles: 710
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.36
-# CHECK-NEXT: IPC: 1.64
-# CHECK-NEXT: Block RThroughput: 5.3
+# CHECK-NEXT: uOps Per Cycle: 2.82
+# CHECK-NEXT: IPC: 1.41
+# CHECK-NEXT: Block RThroughput: 7.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
@@ -2786,14 +2786,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,7] . D=eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] D==eE-------R .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,5] D===eE------R .. add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,7] .D===eE------R .. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,9] .D=====eE-------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2804,42 +2804,42 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 1 1.7 0.2 3.2 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 3.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 9. 1 6.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.4 0.2 3.2 <total>
# CHECK: [36] Code Region - G37
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 610
-# CHECK-NEXT: Total uOps: 4800
+# CHECK-NEXT: Total Cycles: 810
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.87
-# CHECK-NEXT: IPC: 1.64
-# CHECK-NEXT: Block RThroughput: 6.0
+# CHECK-NEXT: uOps Per Cycle: 2.47
+# CHECK-NEXT: IPC: 1.23
+# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
+# CHECK-NEXT: 01234567
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,3] .D=eE-------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,5] . D=eE-------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,7] . D=eE-------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D=eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1
+# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] D==eE-------R . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeeER . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,5] D===eE-------R . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeeER. . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,7] .D===eE-------R. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D======eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2850,27 +2850,27 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: 5. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 1.7 0.2 3.3 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 3.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 5. 1 4.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 3.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.8 0.4 3.3 <total>
# CHECK: [37] Code Region - G38
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 660
-# CHECK-NEXT: Total uOps: 4800
+# CHECK-NEXT: Total Cycles: 809
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.27
-# CHECK-NEXT: IPC: 1.52
-# CHECK-NEXT: Block RThroughput: 6.0
+# CHECK-NEXT: uOps Per Cycle: 2.47
+# CHECK-NEXT: IPC: 1.24
+# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
@@ -2878,14 +2878,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,7] . D=eE-------R.. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE-------R .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R .. add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,7] .D===eE-------R.. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=====eE-------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2896,25 +2896,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1
-# CHECK-NEXT: 1 1.7 0.2 3.3 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 7.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.4 0.2 3.3 <total>
# CHECK: [38] Code Region - G39
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 4003
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 5.0
@@ -2924,14 +2924,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2942,25 +2942,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
-# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 15.5 0.1 3.0 <total>
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [39] Code Region - G40
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 4003
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 1.12
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 5.0
@@ -2970,14 +2970,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -2988,25 +2988,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 15.5 0.1 3.0 <total>
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 17.2 0.1 3.0 <total>
# CHECK: [40] Code Region - G41
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 1903
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 2.36
+# CHECK-NEXT: uOps Per Cycle: 1.05
# CHECK-NEXT: IPC: 0.53
# CHECK-NEXT: Block RThroughput: 5.0
@@ -3016,14 +3016,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: [0,1] D=eE------R . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] .D=======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,3] .D========eE------R .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . D=======eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: [0,5] . D========eE------R.. add x0, x27, #1
-# CHECK-NEXT: [0,6] . D=======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
-# CHECK-NEXT: [0,7] . D========eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D=======eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
-# CHECK-NEXT: [0,9] . D========eE------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D========eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] D=========eE------R .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D=========eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,5] D==========eE------R.. add x0, x27, #1
+# CHECK-NEXT: [0,6] D==========eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,7] .D==========eE------R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D==========eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: [0,9] .D===========eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3034,25 +3034,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
-# CHECK-NEXT: 5. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
-# CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
-# CHECK-NEXT: 9. 1 9.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 7.1 0.1 3.0 <total>
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 5. 1 11.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 11.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 11.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 9. 1 12.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 8.8 0.1 3.0 <total>
# CHECK: [41] Code Region - G42
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 8.82
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 5.0
@@ -3062,14 +3062,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
-# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
-# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
-# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
-# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3080,25 +3080,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
-# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
-# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
-# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
-# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 1.5 0.1 3.0 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [42] Code Region - G43
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 510
-# CHECK-NEXT: Total uOps: 4500
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 8.82
+# CHECK-NEXT: uOps Per Cycle: 3.92
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 5.0
@@ -3108,14 +3108,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3126,25 +3126,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 1 1.5 0.1 3.0 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 3.0 <total>
# CHECK: [43] Code Region - G44
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 3300
+# CHECK-NEXT: Total uOps: 1800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 6.50
+# CHECK-NEXT: uOps Per Cycle: 3.54
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 3.7
@@ -3154,14 +3154,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE------R. add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,5] . D=eE------R add x0, x27, #1
-# CHECK-NEXT: [0,6] . D=eeeeeeE-R ldp s1, s2, [x27], #248
-# CHECK-NEXT: [0,7] . D==eE-----R add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeER ldp d1, d2, [x27], #496
-# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE------R. add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE------R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeE-R ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,7] D====eE-----R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeER ldp d1, d2, [x27], #496
+# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3172,25 +3172,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248
-# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldp d1, d2, [x27], #496
-# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.1 0.1 2.8 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 1.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.3 0.1 2.8 <total>
# CHECK: [44] Code Region - G45
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 507
-# CHECK-NEXT: Total uOps: 1700
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.35
+# CHECK-NEXT: uOps Per Cycle: 2.96
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 2.5
@@ -3206,8 +3206,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,5] D===eE----R. add x0, x27, #1
# CHECK-NEXT: [0,6] D===eeeeeeER ldp q1, q2, [x27, #992]!
# CHECK-NEXT: [0,7] D====eE----R add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeE-R ldp w1, w2, [x27], #248
-# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeE-R ldp w1, w2, [x27], #248
+# CHECK-NEXT: [0,9] D=====eE---R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3224,21 +3224,21 @@ add x0, x27, 1
# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldp q1, q2, [x27, #992]!
# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248
-# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.3 0.1 2.0 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 1.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 2.0 <total>
# CHECK: [45] Code Region - G46
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 507
-# CHECK-NEXT: Total uOps: 1900
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.75
+# CHECK-NEXT: uOps Per Cycle: 2.96
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 3.0
+# CHECK-NEXT: Block RThroughput: 4.3
# CHECK: Timeline view:
# CHECK-NEXT: 01
@@ -3252,8 +3252,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,5] D===eE--R .. add x0, x27, #1
# CHECK-NEXT: [0,6] D===eeeeeER. ldpsw x1, x2, [x27], #248
# CHECK-NEXT: [0,7] D====eE---R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeER ldpsw x1, x2, [x27, #248]!
-# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1
+# CHECK-NEXT: [0,8] D====eeeeeER ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: [0,9] D=====eE---R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3270,9 +3270,9 @@ add x0, x27, 1
# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248
# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
-# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.3 0.1 1.2 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.5 0.1 1.2 <total>
# CHECK: [46] Code Region - G47
@@ -3689,10 +3689,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.76
+# CHECK-NEXT: uOps Per Cycle: 3.97
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 3.5
@@ -3705,7 +3705,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,6] D===eeER. st1 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
@@ -3723,21 +3723,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 0.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
# CHECK: [56] Code Region - G57
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.16
+# CHECK-NEXT: uOps Per Cycle: 3.97
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 4.0
@@ -3749,8 +3749,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeER. st1 { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
@@ -3767,22 +3767,22 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 0.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
# CHECK: [57] Code Region - G58
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.16
+# CHECK-NEXT: uOps Per Cycle: 3.97
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 4.0
@@ -3795,7 +3795,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,6] D===eeER. st1 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
@@ -3813,21 +3813,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 0.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
# CHECK: [58] Code Region - G59
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 703
-# CHECK-NEXT: Total uOps: 3400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.84
+# CHECK-NEXT: uOps Per Cycle: 2.84
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 6.0
@@ -3838,12 +3838,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,4] D==eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,9] .D=====eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3856,23 +3856,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
-# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.2 0.0 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.6 0.2 0.0 <total>
# CHECK: [59] Code Region - G60
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 703
-# CHECK-NEXT: Total uOps: 3600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.12
+# CHECK-NEXT: uOps Per Cycle: 2.84
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 6.5
@@ -3883,12 +3883,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] D==eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D=====eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3901,23 +3901,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.2 0.0 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.6 0.2 0.0 <total>
# CHECK: [60] Code Region - G61
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 703
-# CHECK-NEXT: Total uOps: 3400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.84
+# CHECK-NEXT: uOps Per Cycle: 2.84
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 6.0
@@ -3928,12 +3928,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7] .D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=====eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3946,23 +3946,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.9 0.2 0.0 <total>
+# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.4 0.2 0.0 <total>
# CHECK: [61] Code Region - G62
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 704
-# CHECK-NEXT: Total uOps: 3600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.11
+# CHECK-NEXT: uOps Per Cycle: 2.84
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 6.5
@@ -3974,12 +3974,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,7] . D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1
+# CHECK-NEXT: [0,4] D===eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,5] D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,7] .D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3992,23 +3992,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.4 0.3 0.0 <total>
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.3 0.0 <total>
# CHECK: [62] Code Region - G63
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 804
-# CHECK-NEXT: Total uOps: 4200
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.22
+# CHECK-NEXT: uOps Per Cycle: 2.49
# CHECK-NEXT: IPC: 1.24
# CHECK-NEXT: Block RThroughput: 8.0
@@ -4019,13 +4019,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,1] D=eER. .. add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,3] .D=eER .. add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,5] .D===eER .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,7] . D===eER .. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1
+# CHECK-NEXT: [0,3] D==eER .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D===eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,5] D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,7] .D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D======eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,9] .D=======eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4037,24 +4037,24 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
-# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.3 0.4 0.0 <total>
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 7.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.2 0.4 0.0 <total>
# CHECK: [63] Code Region - G64
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 703
-# CHECK-NEXT: Total uOps: 3800
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.41
+# CHECK-NEXT: uOps Per Cycle: 2.84
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 7.0
@@ -4064,13 +4064,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . D===eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] D===eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] D====eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=====eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4082,24 +4082,24 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.9 0.2 0.0 <total>
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.8 0.2 0.0 <total>
# CHECK: [64] Code Region - G65
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 706
-# CHECK-NEXT: Total uOps: 3200
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.53
+# CHECK-NEXT: uOps Per Cycle: 2.83
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 5.5
@@ -4109,14 +4109,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D===eeeeER . st1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: [0,5] .D====eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D===eeeeER. st1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: [0,7] . D====eE--R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D====eeeeER st1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . D=====eE--R add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeeeER . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,5] D=====eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D=====eeeeER. st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,7] .D=====eE--R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=====eeeeER st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D======eE--R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4127,15 +4127,15 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1
-# CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
-# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
-# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.5 0.3 0.6 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 2.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 5. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 7. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.4 0.3 0.6 <total>
# CHECK: [65] Code Region - G66
@@ -4188,10 +4188,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 2200
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.35
+# CHECK-NEXT: uOps Per Cycle: 3.95
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 3.0
@@ -4234,10 +4234,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.74
+# CHECK-NEXT: uOps Per Cycle: 3.95
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 3.5
@@ -4251,7 +4251,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
@@ -4269,21 +4269,21 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 1.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 1.0 <total>
# CHECK: [68] Code Region - G69
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 2600
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.14
+# CHECK-NEXT: uOps Per Cycle: 3.95
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 4.0
@@ -4296,8 +4296,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
@@ -4314,22 +4314,22 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 1.0 <total>
# CHECK: [69] Code Region - G70
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 2400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.74
+# CHECK-NEXT: uOps Per Cycle: 3.95
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 3.5
@@ -4342,8 +4342,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
@@ -4360,12 +4360,12 @@ add x0, x27, 1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 1.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 1.0 <total>
# CHECK: [70] Code Region - G71
@@ -4464,10 +4464,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 406
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total uOps: 1200
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.93
+# CHECK-NEXT: uOps Per Cycle: 2.96
# CHECK-NEXT: IPC: 1.48
# CHECK-NEXT: Block RThroughput: 3.5
@@ -4478,8 +4478,8 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: [0,5] .D===eE--R add x0, x27, #1
+# CHECK-NEXT: [0,4] D===eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,5] D====eE--R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4492,19 +4492,19 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
-# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.5 0.3 1.2 <total>
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.8 0.3 1.2 <total>
# CHECK: [73] Code Region - G74
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 707
-# CHECK-NEXT: Total uOps: 3800
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.37
+# CHECK-NEXT: uOps Per Cycle: 2.83
# CHECK-NEXT: IPC: 1.41
# CHECK-NEXT: Block RThroughput: 7.0
@@ -4516,12 +4516,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: [0,5] .D===eE---R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: [0,7] . D===eE---R . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D====eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: [0,9] . D=====eE---R add x0, x27, #1
+# CHECK-NEXT: [0,4] D===eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,5] D====eE---R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,7] .D====eE---R . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=====eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,9] .D======eE---R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4534,23 +4534,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
-# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
-# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
-# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.4 0.3 1.4 <total>
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 5. 1 5.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 6.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 7.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 1 4.0 0.3 1.4 <total>
# CHECK: [74] Code Region - G75
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 706
-# CHECK-NEXT: Total uOps: 3400
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.82
+# CHECK-NEXT: uOps Per Cycle: 2.83
# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 6.0
@@ -4562,12 +4562,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] D==eeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,5] D===eE---R. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7] .D===eE--R. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: [0,9] . D====eE---R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,9] .D=====eE---R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4580,23 +4580,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.9 0.2 1.2 <total>
+# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.4 0.2 1.2 <total>
# CHECK: [75] Code Region - G76
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 606
-# CHECK-NEXT: Total uOps: 3200
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.28
+# CHECK-NEXT: uOps Per Cycle: 3.30
# CHECK-NEXT: IPC: 1.65
# CHECK-NEXT: Block RThroughput: 5.5
@@ -4608,12 +4608,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: [0,5] .D===eE--R.. add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] D===eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,5] D====eE--R.. add x0, x27, #1
+# CHECK-NEXT: [0,6] D====eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: [0,7] .D====eE--R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: [0,9] . D====eE--R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,9] .D=====eE--R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4626,23 +4626,23 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
-# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.3 0.2 1.1 <total>
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.8 0.2 1.1 <total>
# CHECK: [76] Code Region - G77
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 3000
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.93
+# CHECK-NEXT: uOps Per Cycle: 3.95
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 5.0
@@ -4654,12 +4654,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] D==eeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4672,25 +4672,25 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
-# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 1.0 <total>
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 1.0 <total>
# CHECK: [77] Code Region - G78
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 706
-# CHECK-NEXT: Total uOps: 3600
+# CHECK-NEXT: Total Cycles: 1003
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.10
-# CHECK-NEXT: IPC: 1.42
-# CHECK-NEXT: Block RThroughput: 6.5
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -4700,12 +4700,12 @@ add x0, x27, 1
# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: [0,5] .D==eE--R . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: [0,7] . D==eE--R. . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,5] D===eE--R . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,7] .D===eE--R. . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4718,40 +4718,40 @@ add x0, x27, 1
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
-# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
-# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
-# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.6 0.1 1.2 <total>
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 1.2 <total>
# CHECK: [78] Code Region - G79
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1205
-# CHECK-NEXT: Total uOps: 5800
+# CHECK-NEXT: Total Cycles: 2399
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.81
-# CHECK-NEXT: IPC: 0.83
-# CHECK-NEXT: Block RThroughput: 12.0
+# CHECK-NEXT: uOps Per Cycle: 0.83
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 24.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456
-# CHECK-NEXT: Index 0123456789
-
-# CHECK: [0,0] DeeeeeeER . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
-# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: [0,3] .D=eE-----R .. add x0, x27, #1
-# CHECK-NEXT: [0,4] . D=eeeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: [0,5] . D==eE----R .. add x0, x27, #1
-# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: [0,9] . D====eE-----R add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] D=eE----R . . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeER . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] D==eE-----R . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D======eeeeeeER. . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,5] D=======eE----R. . . add x0, x27, #1
+# CHECK-NEXT: [0,6] D============eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,7] .D============eE-----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D============eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,9] .D=============eE-----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4762,42 +4762,42 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
-# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
-# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
-# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
-# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.9 0.4 2.3 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 7.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 5. 1 8.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 13.0 5.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 7. 1 13.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 13.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 9. 1 14.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.6 1.0 2.3 <total>
# CHECK: [79] Code Region - G80
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1006
-# CHECK-NEXT: Total uOps: 4800
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.77
-# CHECK-NEXT: IPC: 0.99
-# CHECK-NEXT: Block RThroughput: 9.5
+# CHECK-NEXT: uOps Per Cycle: 1.05
+# CHECK-NEXT: IPC: 0.53
+# CHECK-NEXT: Block RThroughput: 19.0
# CHECK: Timeline view:
-# CHECK-NEXT: 012345
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
-# CHECK: [0,0] DeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
-# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeER. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D===eeeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: [0,5] . D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D====eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1
+# CHECK: [0,0] DeeeeER . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eE--R . . .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeER. . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE----R. . .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D=======eeeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,5] D========eE----R .. add x0, x27, #1
+# CHECK-NEXT: [0,6] D========eeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,7] .D========eE-----R .. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D============eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,9] .D=============eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4808,42 +4808,42 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
-# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
-# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
-# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.4 0.4 1.9 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 8.0 5.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 5. 1 9.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 9.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 7. 1 9.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 13.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 9. 1 14.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 7.0 1.0 1.9 <total>
# CHECK: [80] Code Region - G81
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 807
-# CHECK-NEXT: Total uOps: 5200
+# CHECK-NEXT: Total Cycles: 1508
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 6.44
-# CHECK-NEXT: IPC: 1.24
-# CHECK-NEXT: Block RThroughput: 6.0
+# CHECK-NEXT: uOps Per Cycle: 1.33
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Timeline view:
-# CHECK-NEXT: 01234
-# CHECK-NEXT: Index 0123456789
-
-# CHECK: [0,0] DeeeeeeeER. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
-# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1
-# CHECK-NEXT: [0,2] .DeeeeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1
-# CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D===eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: [0,7] . D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D===eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: [0,9] . D====eE----R add x0, x27, #1
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeER. . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eE-----R. . . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] D==eE-----R . . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D============eeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,5] D=============eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D=============eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,7] .D=============eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D=============eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,9] .D==============eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4854,25 +4854,25 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
-# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.6 0.3 2.2 <total>
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 13.0 10.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 5. 1 14.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 7. 1 14.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 14.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 9. 1 15.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 9.2 1.1 2.2 <total>
# CHECK: [81] Code Region - G82
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 508
-# CHECK-NEXT: Total uOps: 4000
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 7.87
+# CHECK-NEXT: uOps Per Cycle: 3.94
# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 5.0
@@ -4883,13 +4883,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] . D=eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1
+# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] .D===eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4901,39 +4901,38 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
-# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
-# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
-# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
-# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.3 0.1 2.0 <total>
+# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.2 0.1 2.0 <total>
# CHECK: [82] Code Region - G83
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 506
-# CHECK-NEXT: Total uOps: 2800
+# CHECK-NEXT: Total Cycles: 406
+# CHECK-NEXT: Total uOps: 1600
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 5.53
-# CHECK-NEXT: IPC: 1.58
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 1.97
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
-# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
-# CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: [0,3] .D=eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,4] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: [0,5] .D===eE--R. add x0, x27, #1
-# CHECK-NEXT: [0,6] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: [0,7] .D====eE--R add x0, x27, #1
+# CHECK: [0,0] DeeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D=eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeER st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] D==eE----R add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eeeeE-R st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,5] D===eE---R add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,7] .D===eE--R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -4945,12 +4944,12 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
-# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
-# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 3.0 0.0 1.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
-# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.9 0.3 1.5 <total>
+# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.9 0.1 1.8 <total>
# CHECK: [83] Code Region - G84
@@ -4990,10 +4989,10 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2200
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 4.37
+# CHECK-NEXT: uOps Per Cycle: 3.97
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 3.5
@@ -5006,7 +5005,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
# CHECK-NEXT: [0,4] D==eeER . stp d1, d2, [x27, #496]!
# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeER. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,6] D===eeER. stp q1, q2, [x27, #992]!
# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eER. stp w1, w2, [x27], #248
# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
@@ -5024,37 +5023,38 @@ add x0, x27, 1
# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]!
# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 6. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]!
# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 stp w1, w2, [x27], #248
# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 0.0 <total>
+# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
# CHECK: [85] Code Region - G86
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 1800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.97
-# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: uOps Per Cycle: 2.56
+# CHECK-NEXT: IPC: 1.42
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
-# CHECK-NEXT: Index 012345678
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
-# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]!
-# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]!
-# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] D===eeER. str b1, [x27], #254
-# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeER str h1, [x27], #254
-# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
+# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
+# CHECK-NEXT: [0,6] D===eeER . str b1, [x27], #254
+# CHECK-NEXT: [0,7] D=====eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] .D====eeER. str h1, [x27], #254
+# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -5070,36 +5070,37 @@ add x0, x27, 1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]!
# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27], #254
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27], #254
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
+# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 5.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 3.7 0.1 0.0 <total>
# CHECK: [86] Code Region - G87
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.97
-# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: uOps Per Cycle: 1.49
+# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
-# CHECK-NEXT: Index 012345678
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeER. . str s1, [x27], #254
-# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254
-# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeER . str q1, [x27], #254
-# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] D===eeER. str b1, [x27, #254]!
-# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeER str h1, [x27, #254]!
-# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
+# CHECK: [0,0] DeeER. . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER . . str d1, [x27], #254
+# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER . . str q1, [x27], #254
+# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eeER . str b1, [x27, #254]!
+# CHECK-NEXT: [0,7] D========eER . add x0, x27, #1
+# CHECK-NEXT: [0,8] D========eeER. str h1, [x27, #254]!
+# CHECK-NEXT: [0,9] D==========eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -5109,42 +5110,43 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254
-# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27], #254
-# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27, #254]!
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27, #254]!
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 7. 1 9.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 9.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 9. 1 11.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 6.0 0.1 0.0 <total>
# CHECK: [87] Code Region - G88
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total Cycles: 804
+# CHECK-NEXT: Total uOps: 1600
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.97
-# CHECK-NEXT: IPC: 1.98
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.24
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
-# CHECK-NEXT: Index 012345678
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeER. . str s1, [x27, #254]!
-# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]!
-# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeER . str q1, [x27, #254]!
-# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
-# CHECK-NEXT: [0,6] D===eER . str w1, [x27], #254
-# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eER. str x1, [x27], #254
-# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
+# CHECK: [0,0] DeeER. .. str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D==eER .. add x0, x27, #1
+# CHECK-NEXT: [0,2] D==eeER .. str d1, [x27, #254]!
+# CHECK-NEXT: [0,3] D====eER .. add x0, x27, #1
+# CHECK-NEXT: [0,4] D====eeER .. str q1, [x27, #254]!
+# CHECK-NEXT: [0,5] D======eER.. add x0, x27, #1
+# CHECK-NEXT: [0,6] D======eER.. str w1, [x27], #254
+# CHECK-NEXT: [0,7] D=======eER. add x0, x27, #1
+# CHECK-NEXT: [0,8] D=======eER. str x1, [x27], #254
+# CHECK-NEXT: [0,9] .D=======eER add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -5154,26 +5156,26 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]!
-# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27, #254]!
-# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str w1, [x27], #254
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str x1, [x27], #254
-# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 8.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 1 5.5 0.1 0.0 <total>
# CHECK: [88] Code Region - G89
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 2000
+# CHECK-NEXT: Total uOps: 1900
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: uOps Per Cycle: 3.77
# CHECK-NEXT: IPC: 1.98
# CHECK-NEXT: Block RThroughput: 2.5
@@ -5187,7 +5189,7 @@ add x0, x27, 1
# CHECK-NEXT: [0,4] D==eER . strb w1, [x27], #254
# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1
# CHECK-NEXT: [0,6] D===eER . strb w1, [x27, #254]!
-# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1
+# CHECK-NEXT: [0,7] D====eER. add x0, x27, #1
# CHECK-NEXT: [0,8] .D===eER. strh w1, [x27], #254
# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1
@@ -5205,10 +5207,10 @@ add x0, x27, 1
# CHECK-NEXT: 4. 1 3.0 0.0 0.0 strb w1, [x27], #254
# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 6. 1 4.0 0.0 0.0 strb w1, [x27, #254]!
-# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1
+# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1
# CHECK-NEXT: 8. 1 4.0 0.0 0.0 strh w1, [x27], #254
# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.2 0.1 0.0 <total>
+# CHECK-NEXT: 1 3.3 0.1 0.0 <total>
# CHECK: [89] Code Region - G90
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
index 8b1c8a4e4ca55c2..03cd932e2418680 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -7,10 +7,10 @@ cmp x0, #4
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
# CHECK-NEXT: Total Cycles: 54
-# CHECK-NEXT: Total uOps: 200
+# CHECK-NEXT: Total uOps: 300
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: uOps Per Cycle: 5.56
# CHECK-NEXT: IPC: 3.70
# CHECK-NEXT: Block RThroughput: 0.5
@@ -24,7 +24,7 @@ cmp x0, #4
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.25 mov x0, x1
-# CHECK-NEXT: 1 1 0.33 cmp x0, #4
+# CHECK-NEXT: 2 1 0.33 cmp x0, #4
# CHECK: Resources:
# CHECK-NEXT: [0.0] - V1UnitB
>From 9941e557f7f8992e27748a572a4eb97995aad840 Mon Sep 17 00:00:00 2001
From: Julien Villette <julien.villette at sipearl.com>
Date: Mon, 20 Jan 2025 11:02:02 +0100
Subject: [PATCH 2/2] [AArch64] Neoverse V1 scheduling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Consider conflicts between SVE and ASIMD instructions.
Software Optimization Guide:
Maximum issue bandwidth is sustained using one of the following combinations:
• 2 SVE Uops.
• 4 ASIMD Uops.
• 1 SVE Uop on V0 and 2 ASIMD Uops on VX13.
• 1 SVE Uop on V1 and 2 ASIMD Uops on V02.
---
.../Target/AArch64/AArch64SchedNeoverseV1.td | 430 +-
.../Neoverse/512tvb-sve-instructions.s | 6 +-
.../llvm-mca/AArch64/Neoverse/V1-forwarding.s | 134 +-
.../AArch64/Neoverse/V1-scheduling-info.s | 5935 +++++++++--------
.../AArch64/Neoverse/V1-sve-instructions.s | 5490 +++++++--------
5 files changed, 6025 insertions(+), 5970 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 9f2f11bafe79fe9..99ca28bc4151dad 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -66,6 +66,11 @@ def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1,
def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units
def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units
def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units
+// Select V0 + V2 or V1 + V3 by issuing 2 micro operations
+def V1UnitSVE01 : ProcResGroup<[V1UnitV0, V1UnitV1, // FP/ASIMD 0,2/1,3 units
+ V1UnitV2, V1UnitV3]>;
+def V1UnitSVE0 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0,2 units
+def V1UnitSVE1 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1,3 units
// Define commonly used read types.
@@ -144,39 +149,63 @@ def V1Write_4c2_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4;
def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
def V1Write_6c3_1V : SchedWriteRes<[V1UnitV]> { let Latency = 6;
let ReleaseAtCycles = [3]; }
-def V1Write_12c2_1V : SchedWriteRes<[V1UnitV1]> { let Latency = 12;
- let ReleaseAtCycles = [2]; }
-def V1Write_14c2_1V : SchedWriteRes<[V1UnitV1]> { let Latency = 14;
- let ReleaseAtCycles = [2]; }
+def V1Write_12c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+def V1Write_14c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 14;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; }
+def V1Write_2c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
+def V1Write_3c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 3;
+ let NumMicroOps = 2; }
def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
-def V1Write_5c2_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 5;
- let ReleaseAtCycles = [2]; }
-def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
-def V1Write_6c4_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6;
- let ReleaseAtCycles = [4]; }
-def V1Write_10c9_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
- let ReleaseAtCycles = [9]; }
-def V1Write_11c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 11;
- let ReleaseAtCycles = [10]; }
-def V1Write_12c11_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
- let ReleaseAtCycles = [11]; }
-def V1Write_13c12_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
- let ReleaseAtCycles = [12]; }
-def V1Write_15c14_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
- let ReleaseAtCycles = [14]; }
-def V1Write_16c14_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
- let ReleaseAtCycles = [14]; }
-def V1Write_19c18_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 19;
- let ReleaseAtCycles = [18]; }
-def V1Write_20c20_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
- let ReleaseAtCycles = [20]; }
+def V1Write_4c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_5c4_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 5;
+ let ReleaseAtCycles = [4];
+ let NumMicroOps = 2; }
+def V1Write_6c_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def V1Write_6c4_1SVE0 : SchedWriteRes<[V1UnitSVE0,V1UnitSVE0]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4,4]; }
+def V1Write_10c18_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [18]; }
+def V1Write_11c20_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [20]; }
+def V1Write_12c22_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [22]; }
+def V1Write_13c24_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 13;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [24]; }
+def V1Write_15c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 15;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_16c28_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 16;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [28]; }
+def V1Write_19c36_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 19;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [36]; }
+def V1Write_20c40_1SVE0 : SchedWriteRes<[V1UnitSVE0]> { let Latency = 20;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [40]; }
def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
+def V1Write_2c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 2;
+ let NumMicroOps = 2; }
def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
-def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
+def V1Write_3c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def V1Write_4c_1SVE01 : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+ let NumMicroOps = 2; }
def V1Write_4c2_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
let ReleaseAtCycles = [2]; }
def V1Write_4c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4;
@@ -185,16 +214,21 @@ def V1Write_6c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
let ReleaseAtCycles = [3]; }
def V1Write_6c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 6;
let ReleaseAtCycles = [5]; }
-def V1Write_8c3_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 8;
- let ReleaseAtCycles = [3]; }
-def V1Write_9c4_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 9;
- let ReleaseAtCycles = [4]; }
-def V1Write_12c4_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 12;
- let ReleaseAtCycles = [4]; }
-def V1Write_13c6_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 13;
- let ReleaseAtCycles = [6]; }
-def V1Write_11c5_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 11;
- let ReleaseAtCycles = [5]; }
+def V1Write_8c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [6]; }
+def V1Write_9c8_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 9;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [8]; }
+def V1Write_12c8_1SVE01: SchedWriteRes<[V1UnitSVE01]> { let Latency = 12;
+ let ReleaseAtCycles = [8];
+ let NumMicroOps = 2; }
+def V1Write_13c6_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 13;
+ let ReleaseAtCycles = [12];
+ let NumMicroOps = 2; }
+def V1Write_11c10_1SVE01 : SchedWriteRes<[V1UnitSVE01]> { let Latency = 11;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [10]; }
def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
def V1Write_4c2_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4;
@@ -224,12 +258,18 @@ def V1Write_16c8_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
def V1Write_16c15_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
let ReleaseAtCycles = [15]; }
def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; }
-def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; }
-def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; }
-def V1Write_8c2_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 8;
- let ReleaseAtCycles = [2]; }
-def V1Write_10c2_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 10;
- let ReleaseAtCycles = [2]; }
+def V1Write_2c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def V1Write_3c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def V1Write_4c_1SVE1 : SchedWriteRes<[V1UnitSVE1,V1UnitSVE1]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def V1Write_8c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 8;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
+def V1Write_10c4_1SVE1 : SchedWriteRes<[V1UnitSVE1]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
def V1Write_4c2_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4;
@@ -275,9 +315,9 @@ def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]> {
let NumMicroOps = 2;
}
-def V1Write_9c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+def V1Write_9c2_1L01_1SVE01 : SchedWriteRes<[V1UnitL01, V1UnitSVE01]> {
let Latency = 9;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [2,2];
}
@@ -405,52 +445,52 @@ def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]> {
let NumMicroOps = 2;
}
-def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+def V1Write_4c_1M0_1SVE0 : SchedWriteRes<[V1UnitM0, V1UnitSVE0, V1UnitSVE0]> {
let Latency = 4;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
-def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+def V1Write_7c_1M0_1SVE0 : SchedWriteRes<[V1UnitM0, V1UnitSVE0, V1UnitSVE0]> {
let Latency = 7;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
-def V1Write_8c2_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]> {
+def V1Write_8c2_1M0_1SVE0 : SchedWriteRes<[V1UnitM0, V1UnitSVE0]> {
let Latency = 8;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [2,2];
}
-def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]> {
+def V1Write_5c_1M0_1SVE01 : SchedWriteRes<[V1UnitM0, V1UnitSVE01, V1UnitSVE01]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
-def V1Write_7c2_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]> {
+def V1Write_7c2_1M0_1SVE01 : SchedWriteRes<[V1UnitM0, V1UnitSVE01]> {
let Latency = 7;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [2,2];
}
-def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]> {
+def V1Write_6c_1M0_1SVE1 : SchedWriteRes<[V1UnitM0, V1UnitSVE1, V1UnitSVE1]> {
let Latency = 6;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
-def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]> {
+def V1Write_9c_1M0_1SVE1 : SchedWriteRes<[V1UnitM0, V1UnitSVE1, V1UnitSVE1]> {
let Latency = 9;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
-def V1Write_4c2_1V0 : SchedWriteRes<[V1UnitV0]> {
+def V1Write_4c2_1SVE0 : SchedWriteRes<[V1UnitSVE0, V1UnitSVE0]> {
let Latency = 4;
- let NumMicroOps = 1;
- let ReleaseAtCycles = [2];
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2,2];
}
-def V1Write_8c2_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+def V1Write_8c2_2L01_2SVE01 : SchedWriteRes<[V1UnitL01, V1UnitSVE01]> {
let Latency = 8;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [2, 2];
}
@@ -478,15 +518,15 @@ def V1Write_6c2_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
let ReleaseAtCycles = [2,2];
}
-def V1Write_11c6_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+def V1Write_11c6_1L01_1SVE01 : SchedWriteRes<[V1UnitL01, V1UnitSVE01]> {
let Latency = 11;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [6,6];
}
-def V1Write_12c8_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]> {
+def V1Write_12c8_1L01_1SVE01 : SchedWriteRes<[V1UnitL01, V1UnitSVE01]> {
let Latency = 12;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
let ReleaseAtCycles = [8,8];
}
@@ -527,21 +567,21 @@ def V1Write_11c18_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]> {
let ReleaseAtCycles = [18,18,18];
}
-def V1Write_10c2_1L01_1V01_1S : SchedWriteRes<[V1UnitL01, V1UnitV01, V1UnitS]> {
+def V1Write_10c2_1L01_1SVE01_1S : SchedWriteRes<[V1UnitL01, V1UnitSVE01, V1UnitS]> {
let Latency = 10;
- let NumMicroOps = 3;
+ let NumMicroOps = 4;
let ReleaseAtCycles = [2,2,2];
}
-def V1Write_13c6_1L01_1S_1V01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV01]> {
+def V1Write_13c6_1L01_1S_1SVE01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitSVE01]> {
let Latency = 13;
- let NumMicroOps = 3;
+ let NumMicroOps = 4;
let ReleaseAtCycles = [6,6,6];
}
-def V1Write_13c8_1L01_1S_1V01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV01]> {
+def V1Write_13c8_1L01_1S_1SVE01 : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitSVE01]> {
let Latency = 13;
- let NumMicroOps = 3;
+ let NumMicroOps = 4;
let ReleaseAtCycles = [8,8,8];
}
@@ -600,36 +640,48 @@ def V1Rd_BFMLA : SchedReadAdvance<2, [V1Wr_BFMLA]>;
def V1Wr_CRC : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>;
-def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
+def V1Wr_ZDOTB : SchedWriteRes<[V1UnitSVE01]> { let Latency = 3;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2]; }
def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>;
def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; let ReleaseAtCycles = [2]; }
def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>;
-def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
+def V1Wr_ZDOTH : SchedWriteRes<[V1UnitSVE0]> { let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2]; }
def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>;
def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>;
-def V1Wr_ZMABHS : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
-def V1Rd_ZMABHS : SchedReadAdvance<2, [V1Wr_ZMABHS]>;
+def V1Wr_ZMABHS : SchedWriteRes<[V1UnitSVE0]> { let Latency = 4;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [2]; }
+def V1Rd_ZMABHS : SchedReadAdvance<2, [V1Wr_ZMABHS]>;
-let Latency = 5, NumMicroOps = 1 in
-def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
+def V1Wr_ZMAD : SchedWriteRes<[V1UnitSVE0]> { let Latency = 5;
+ let NumMicroOps = 2;
+ let ReleaseAtCycles = [4]; }
def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>;
-def V1Wr_ZFCMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Wr_ZFCMA : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 5;
+ let NumMicroOps = 2; }
def V1Rd_ZFCMA : SchedReadAdvance<3, [V1Wr_ZFCMA]>;
-def V1Wr_ZFMA : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
+def V1Wr_ZFMA : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+ let NumMicroOps = 2; }
def V1Rd_ZFMA : SchedReadAdvance<2, [V1Wr_ZFMA]>;
-def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
+def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 4;
+ let NumMicroOps = 2; }
def V1Rd_ZBFDOT : SchedReadAdvance<2, [V1Wr_ZBFDOT]>;
-def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 5;
+ let NumMicroOps = 2; }
def V1Rd_ZBFMMA : SchedReadAdvance<2, [V1Wr_ZBFMMA]>;
-def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitSVE01,V1UnitSVE01]> { let Latency = 5;
+ let NumMicroOps = 2; }
def V1Rd_ZBFMAL : SchedReadAdvance<3, [V1Wr_ZBFMAL]>;
// Miscellaneous Instructions
@@ -1577,7 +1629,7 @@ def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$"
"^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
// Predicate counting vector, active predicate
-def : InstRW<[V1Write_7c2_1M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
+def : InstRW<[V1Write_7c2_1M0_1SVE01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
// Predicate logical
def : InstRW<[V1Write_1c_1M0],
@@ -1616,7 +1668,7 @@ def : InstRW<[V1Write_3c2_1M0], (instregex "^PTRUES_[BHSD]$")>;
// Arithmetic, basic
// Logical
-def : InstRW<[V1Write_2c_1V01],
+def : InstRW<[V1Write_2c_1SVE01],
(instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
"^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]",
"^ADR_[SU]XTW_ZZZ_D_[0123]$",
@@ -1631,7 +1683,7 @@ def : InstRW<[V1Write_2c_1V01],
"^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift
-def : InstRW<[V1Write_2c_1V1],
+def : InstRW<[V1Write_2c_1SVE1],
(instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
"^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
"^(ASR|LSL|LSR)_ZZI_[BHSD]",
@@ -1639,51 +1691,51 @@ def : InstRW<[V1Write_2c_1V1],
"^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift right for divide
-def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
+def : InstRW<[V1Write_4c_1SVE1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
// Count/reverse bits
-def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
// Broadcast logical bitmask immediate to vector
-def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
+def : InstRW<[V1Write_2c_1SVE01], (instrs DUPM_ZI)>;
// Compare and set flags
-def : InstRW<[V1Write_4c_1M0_1V0],
+def : InstRW<[V1Write_4c_1M0_1SVE0],
(instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
"^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
// Conditional extract operations, scalar form
-def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+def : InstRW<[V1Write_9c_1M0_1SVE1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
// Conditional extract operations, SIMD&FP scalar and vector forms
-def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
+def : InstRW<[V1Write_3c_1SVE1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
"^COMPACT_ZPZ_[SD]$",
"^SPLICE_ZPZZ?_[BHSD]$")>;
// Convert to floating point, 64b to float or convert to double
-def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]")>;
+def : InstRW<[V1Write_3c_1SVE0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]")>;
// Convert to floating point, 32b to single or half
-def : InstRW<[V1Write_4c2_1V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]",
+def : InstRW<[V1Write_4c2_1SVE0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]",
"^[SU]CVTF_ZPmZ_StoD")>;
// Convert to floating point, 16b to half
-def : InstRW<[V1Write_6c4_1V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+def : InstRW<[V1Write_6c4_1SVE0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
// Copy, scalar
-def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
+def : InstRW<[V1Write_5c_1M0_1SVE01], (instregex "^CPY_ZPmR_[BHSD]$")>;
// Copy, scalar SIMD&FP or imm
-def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
// Divides, 32 bit
-def : InstRW<[V1Write_12c11_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
- "^[SU]DIV_ZPZZ_S")>;
+def : InstRW<[V1Write_12c22_1SVE0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
// Divides, 64 bit
-def : InstRW<[V1Write_20c20_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
- "^[SU]DIV_ZPZZ_D")>;
+def : InstRW<[V1Write_20c40_1SVE0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
// Dot product, 8 bit
def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
@@ -1696,58 +1748,58 @@ def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB],
def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
// Duplicate, immediate and indexed form
-def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^DUP_ZI_[BHSD]$",
"^DUP_ZZI_[BHSDQ]$")>;
// Duplicate, scalar form
def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
// Extend, sign or zero
-def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
- "^[SU]XTH_ZPmZ_[SD]",
- "^[SU]XTW_ZPmZ_[D]")>;
+def : InstRW<[V1Write_2c_1SVE1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
// Extract
-def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
+def : InstRW<[V1Write_2c_1SVE01], (instrs EXT_ZZI)>;
// Extract/insert operation, SIMD and FP scalar form
-def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
+def : InstRW<[V1Write_3c_1SVE1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
"^INSR_ZV_[BHSD]$")>;
// Extract/insert operation, scalar
-def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
+def : InstRW<[V1Write_6c_1M0_1SVE1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
"^INSR_ZR_[BHSD]$")>;
// Horizontal operations, B, H, S form, imm, imm
-def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
+def : InstRW<[V1Write_4c_1SVE0], (instregex "^INDEX_II_[BHS]$")>;
// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
-def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+def : InstRW<[V1Write_7c_1M0_1SVE0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
// Horizontal operations, D form, imm, imm
-def : InstRW<[V1Write_5c2_1V0], (instrs INDEX_II_D)>;
+def : InstRW<[V1Write_5c4_1SVE0], (instrs INDEX_II_D)>;
// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
-def : InstRW<[V1Write_8c2_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[V1Write_8c2_1M0_1SVE0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
// Move prefix
-def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
- "^MOVPRFX_ZZ$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
+ "^MOVPRFX_ZZ$")>;
// Matrix multiply-accumulate
def : InstRW<[V1Wr_ZMMA, V1Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
// Multiply, B, H, S element size
-def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
- "^MUL_ZPZZ_[BHS]",
- "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
- "^[SU]MULH_ZPZZ_[BHS]")>;
+def : InstRW<[V1Write_4c_1SVE0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
// Multiply, D element size
-def : InstRW<[V1Write_5c2_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
- "^MUL_ZPZZ_D",
- "^[SU]MULH_(ZPmZ|ZZZ)_D",
- "^[SU]MULH_ZPZZ_D")>;
+def : InstRW<[V1Write_5c4_1SVE0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D")>;
// Multiply accumulate, D element size
def : InstRW<[V1Wr_ZMAD, V1Rd_ZMAD],
@@ -1760,32 +1812,32 @@ def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD],
def : InstRW<[V1Wr_ZMABHS, ReadDefault, V1Rd_ZMABHS], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
// Predicate counting vector
-def : InstRW<[V1Write_2c_1V01], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
// Reduction, arithmetic, B form
-def : InstRW<[V1Write_14c2_1V],
+def : InstRW<[V1Write_14c4_1SVE1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
// Reduction, arithmetic, H form
-def : InstRW<[V1Write_12c2_1V],
+def : InstRW<[V1Write_12c4_1SVE1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
// Reduction, arithmetic, S form
-def : InstRW<[V1Write_10c2_1V1],
+def : InstRW<[V1Write_10c4_1SVE1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
// Reduction, arithmetic, D form
-def : InstRW<[V1Write_8c2_1V1],
+def : InstRW<[V1Write_8c4_1SVE1],
(instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
// Reduction, logical
-def : InstRW<[V1Write_12c4_1V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
+def : InstRW<[V1Write_12c8_1SVE01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
// Reverse, vector
-def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
- "^REVB_ZPmZ_[HSD]$",
- "^REVH_ZPmZ_[SD]$",
- "^REVW_ZPmZ_D$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^REV_ZZ_[BHSD]$",
+ "^REVB_ZPmZ_[HSD]$",
+ "^REVH_ZPmZ_[SD]$",
+ "^REVW_ZPmZ_D$")>;
// Select, vector form
// Table lookup
@@ -1793,45 +1845,45 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
// Transpose, vector form
// Unpack and extend
// Zip/unzip
-def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
- "^TB[LX]_ZZZ_[BHSD]$",
- "^TRN[12]_ZZZ_[BHSDQ]$",
- "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
- "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^SEL_ZPZZ_[BHSD]$",
+ "^TB[LX]_ZZZ_[BHSD]$",
+ "^TRN[12]_ZZZ_[BHSDQ]$",
+ "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
+ "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
// SVE floating-point instructions
// -----------------------------------------------------------------------------
// Floating point absolute value/difference
-def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]",
- "^FABD_ZPZZ_[HSD]",
- "^FABS_ZPmZ_[HSD]")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
// Floating point arithmetic
-def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
- "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
- "^FADDP_ZPmZZ_[HSD]",
- "^FNEG_ZPmZ_[HSD]",
- "^FSUBR_ZPm[IZ]_[HSD]",
- "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
// Floating point associative add, F16
-def : InstRW<[V1Write_19c18_1V0], (instrs FADDA_VPZ_H)>;
+def : InstRW<[V1Write_19c36_1SVE0], (instrs FADDA_VPZ_H)>;
// Floating point associative add, F32
-def : InstRW<[V1Write_11c10_1V0], (instrs FADDA_VPZ_S)>;
+def : InstRW<[V1Write_11c20_1SVE0], (instrs FADDA_VPZ_S)>;
// Floating point associative add, F64
-def : InstRW<[V1Write_8c3_1V01], (instrs FADDA_VPZ_D)>;
+def : InstRW<[V1Write_8c6_1SVE01], (instrs FADDA_VPZ_D)>;
// Floating point compare
-def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
+def : InstRW<[V1Write_2c_1SVE0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
"^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
"^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
// Floating point complex add
-def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+def : InstRW<[V1Write_3c_1SVE01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
// Floating point complex multiply add
def : InstRW<[V1Wr_ZFCMA, ReadDefault, V1Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
@@ -1839,36 +1891,36 @@ def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
// Floating point convert to integer, F32
-def : InstRW<[V1Write_4c2_1V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
- "^FCVTZ[SU]_ZPmZ_(StoS|StoD)")>;
+def : InstRW<[V1Write_4c2_1SVE0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTZ[SU]_ZPmZ_(StoS|StoD)")>;
// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
// Floating point convert to integer, F64
-def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+def : InstRW<[V1Write_3c_1SVE0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
"^FCVTZ[SU]_ZPmZ_(DtoS|DtoD)")>;
// Floating point convert to integer, F16
-def : InstRW<[V1Write_6c4_1V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoH|HtoS|HtoD)")>;
+def : InstRW<[V1Write_6c4_1SVE0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoH|HtoS|HtoD)")>;
// Floating point copy
-def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
- "^FDUP_ZI_[HSD]$")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^FCPY_ZPmI_[HSD]$",
+ "^FDUP_ZI_[HSD]$")>;
// Floating point divide, F16
-def : InstRW<[V1Write_13c12_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+def : InstRW<[V1Write_13c24_1SVE0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
// Floating point divide, F32
-def : InstRW<[V1Write_10c9_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+def : InstRW<[V1Write_10c18_1SVE0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
// Floating point divide, F64
-def : InstRW<[V1Write_15c14_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+def : InstRW<[V1Write_15c28_1SVE0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
// Floating point min/max
-def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
- "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
+def : InstRW<[V1Write_2c_1SVE01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
// Floating point multiply
-def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+def : InstRW<[V1Write_3c_1SVE01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
"^FMULX_ZPZZ_[HSD]",
"^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
"^FMUL_ZPZ[IZ]_[HSD]")>;
@@ -1882,58 +1934,58 @@ def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA],
"^FN?ML[AS]_ZPZZZ_[HSD]")>;
// Floating point reciprocal step
-def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
+def : InstRW<[V1Write_4c_1SVE01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
// Floating point reciprocal estimate, F16
-def : InstRW<[V1Write_6c_1V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
+def : InstRW<[V1Write_6c_1SVE0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
// Floating point reciprocal estimate, F32
-def : InstRW<[V1Write_4c_1V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
+def : InstRW<[V1Write_4c_1SVE0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
// Floating point reciprocal estimate, F64
-def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
+def : InstRW<[V1Write_3c_1SVE0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
// Floating point reciprocal exponent
-def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>;
+def : InstRW<[V1Write_3c_1SVE0], (instregex "^FRECPX_ZPmZ_[HSD]")>;
// Floating point reduction, F16
-def : InstRW<[V1Write_13c6_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
+def : InstRW<[V1Write_13c6_1SVE01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
// Floating point reduction, F32
-def : InstRW<[V1Write_11c5_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
+def : InstRW<[V1Write_11c10_1SVE01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
// Floating point reduction, F64
-def : InstRW<[V1Write_9c4_1V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
+def : InstRW<[V1Write_9c8_1SVE01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
// Floating point round to integral, F16
-def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
+def : InstRW<[V1Write_6c_1SVE0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
// Floating point round to integral, F32
-def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
+def : InstRW<[V1Write_4c_1SVE0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
// Floating point round to integral, F64
-def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
+def : InstRW<[V1Write_3c_1SVE0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V1Write_13c12_1V0], (instregex "^FSQRT_ZPmZ_H")>;
+def : InstRW<[V1Write_13c24_1SVE0], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V1Write_10c9_1V0], (instregex "^FSQRT_ZPmZ_S")>;
+def : InstRW<[V1Write_10c18_1SVE0], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V1Write_16c14_1V0], (instregex "^FSQRT_ZPmZ_D")>;
+def : InstRW<[V1Write_16c28_1SVE0], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric
-def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
- "^FTMAD_ZZI_[HSD]$",
- "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
+def : InstRW<[V1Write_3c_1SVE01], (instregex "^FEXPA_ZZ_[HSD]$",
+ "^FTMAD_ZZI_[HSD]$",
+ "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
// SVE BFloat16 (BF16) instructions
// -----------------------------------------------------------------------------
// Convert, F32 to BF16
-def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+def : InstRW<[V1Write_4c_1SVE0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
// Dot product
def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
@@ -2000,23 +2052,23 @@ def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
"^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
-def : InstRW<[V1Write_8c2_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
+def : InstRW<[V1Write_8c2_2L01_2SVE01], (instregex "^LD2[BHWD]_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + scalar
-def : InstRW<[V1Write_10c2_1L01_1V01_1S], (instrs LD2H)>;
-def : InstRW<[V1Write_9c2_1L01_1V01], (instregex "^LD2[BWD]$")>;
+def : InstRW<[V1Write_10c2_1L01_1SVE01_1S], (instrs LD2H)>;
+def : InstRW<[V1Write_9c2_1L01_1SVE01], (instregex "^LD2[BWD]$")>;
// Contiguous Load three structures to three vectors, scalar + imm
-def : InstRW<[V1Write_11c6_1L01_1V01], (instregex "^LD3[BHWD]_IMM$")>;
+def : InstRW<[V1Write_11c6_1L01_1SVE01], (instregex "^LD3[BHWD]_IMM$")>;
// Contiguous Load three structures to three vectors, scalar + scalar
-def : InstRW<[V1Write_13c6_1L01_1S_1V01], (instregex "^LD3[BHWD]$")>;
+def : InstRW<[V1Write_13c6_1L01_1S_1SVE01], (instregex "^LD3[BHWD]$")>;
// Contiguous Load four structures to four vectors, scalar + imm
-def : InstRW<[V1Write_12c8_1L01_1V01], (instregex "^LD4[BHWD]_IMM$")>;
+def : InstRW<[V1Write_12c8_1L01_1SVE01], (instregex "^LD4[BHWD]_IMM$")>;
// Contiguous Load four structures to four vectors, scalar + scalar
-def : InstRW<[V1Write_13c8_1L01_1S_1V01], (instregex "^LD4[BHWD]$")>;
+def : InstRW<[V1Write_13c8_1L01_1S_1SVE01], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
def : InstRW<[V1Write_11c12_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s
index e6d4d67cca780d5..5a42308a937ade8 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/512tvb-sve-instructions.s
@@ -14,7 +14,7 @@ abs z0.b, p0/m, z0.b
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 2 0.50 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: 2 2 0.50 abs z0.b, p0/m, z0.b
# CHECK: Resources:
# CHECK-NEXT: [0.0] - V1UnitB
@@ -38,8 +38,8 @@ abs z0.b, p0/m, z0.b
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - -
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z0.b, p0/m, z0.b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
index 4d099f585143709..1905ee38f99c8a7 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s
@@ -893,10 +893,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.67
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -911,7 +911,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b
# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b
-# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: [1,3] .D====================eeeER sdot z0.s, z0.b, z1.b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -923,18 +923,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b
# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b
-# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b
-# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 15.5 0.0 0.0 sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: 2 11.9 0.1 0.0 <total>
# CHECK: [18] Code Region - Z sudot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.67
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -949,7 +949,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1]
-# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: [1,3] .D====================eeeER sdot z0.s, z0.b, z1.b[1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -961,18 +961,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
-# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1]
-# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 15.5 0.0 0.0 sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: 2 11.9 0.1 0.0 <total>
# CHECK: [19] Code Region - Z sdot.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: uOps Per Cycle: 0.57
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 5.0
@@ -987,7 +987,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . sdot z0.d, z1.h, z2.h
# CHECK-NEXT: [1,2] D====================eeeeER . sdot z0.d, z1.h, z2.h
-# CHECK-NEXT: [1,3] D========================eeeeER sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: [1,3] .D=======================eeeeER sdot z0.d, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -999,18 +999,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sdot z0.d, z1.h, z2.h
-# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sdot z0.d, z0.h, z1.h
-# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 17.5 0.0 0.0 sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: 2 13.1 0.1 0.0 <total>
# CHECK: [20] Code Region - Z smmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: uOps Per Cycle: 0.42
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1045,10 +1045,10 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.47
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1063,7 +1063,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. mla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] D=============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] .D============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1075,18 +1075,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.5 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
# CHECK: [22] Code Region - Z mad.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.47
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1101,7 +1101,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. mad z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. mad z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] D=============================eeeeeER mad z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] .D============================eeeeeER mad z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1113,18 +1113,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.5 0.0 0.0 mad z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.0 0.0 0.0 mad z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
# CHECK: [23] Code Region - Z msb.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1703
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.47
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 8.0
@@ -1139,7 +1139,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeER. .. msb z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D========================eeeeeER .. msb z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] D=============================eeeeeER msb z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] .D============================eeeeeER msb z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1151,18 +1151,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1. 2 14.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 16.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 21.5 0.0 0.0 msb z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 21.0 0.0 0.0 msb z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 15.4 0.1 0.0 <total>
# CHECK: [24] Code Region - Z fcmla ZPmZZ
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.53
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1177,7 +1177,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
-# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: [1,3] .D========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1189,18 +1189,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
-# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90
-# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: 2 12.9 0.1 0.0 <total>
# CHECK: [25] Code Region - Z fcmla ZZZI
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.53
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1215,7 +1215,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
-# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: [1,3] .D========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1227,18 +1227,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
-# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90
-# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: 2 12.9 0.1 0.0 <total>
# CHECK: [26] Code Region - Z fmla ZPmZZ
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.61
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1253,7 +1253,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,3] .D=====================eeeeER fmla z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1265,18 +1265,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
-# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d
-# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 11.6 0.1 0.0 <total>
# CHECK: [27] Code Region - Z fmla ZZZI
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.61
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1291,7 +1291,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, z1.d, z2.d[1]
-# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: [1,3] .D=====================eeeeER fmla z0.d, z0.d, z1.d[1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1303,18 +1303,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
-# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, z0.d, z1.d[1]
-# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: 2 11.6 0.1 0.0 <total>
# CHECK: [28] Code Region - Z bfdot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.61
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1329,7 +1329,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D==================eeeeER. . bfdot z0.s, z1.h, z2.h
-# CHECK-NEXT: [1,3] D======================eeeeER bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,3] .D=====================eeeeER bfdot z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1341,18 +1341,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 bfdot z0.s, z1.h, z2.h
-# CHECK-NEXT: 3. 2 16.5 0.0 0.0 bfdot z0.s, z0.h, z1.h
-# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 11.6 0.1 0.0 <total>
# CHECK: [29] Code Region - Z bfmmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1603
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1367,7 +1367,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D================eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeeER . . bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D======================eeeeeER. . bfmmla z0.s, z1.h, z2.h
-# CHECK-NEXT: [1,3] D===========================eeeeeER bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,3] .D==========================eeeeeER bfmmla z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1379,18 +1379,18 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfmmla z0.s, z1.h, z2.h
-# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfmmla z0.s, z0.h, z1.h
-# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 13.9 0.1 0.0 <total>
# CHECK: [30] Code Region - bfmlalb
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: uOps Per Cycle: 0.53
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 2.0
@@ -1405,7 +1405,7 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
-# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,3] .D========================eeeeeER bfmlalb z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1417,5 +1417,5 @@ bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
-# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb z0.s, z0.h, z1.h
-# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 12.9 0.1 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
index 9097ec650f2c0ee..1be593dd04fb561 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s
@@ -10,7 +10,7 @@ test:
.cfi_startproc
abs D15, D11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
abs V25.2S, V25.2S // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
- abs Z26.B, P6/M, Z27.B // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ abs Z26.B, P6/M, Z27.B // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
adc W13, W6, W4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
adc X8, X12, X10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
adcs W29, W7, W30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -27,8 +27,8 @@ test:
add WSP, WSP, #3547, LSL #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
add X7, X30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
add X7, X2, #319, LSL #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
- add Z13.D, Z13.D, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- add Z16.D, Z16.D, #233, LSL #8 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z13.D, Z13.D, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ add Z16.D, Z16.D, #233, LSL #8 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
add W3, W2, W21, LSL #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
add W6, W21, W17, LSL #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
add W28, W30, W19, ASR #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
@@ -37,8 +37,8 @@ test:
add X5, X20, X28, LSR #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
add D0, D23, D21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
add V19.4S, V24.4S, V15.4S // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
- add Z29.D, P5/M, Z29.D, Z29.D // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- add Z10.H, Z22.H, Z13.H // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ add Z29.D, P5/M, Z29.D, Z29.D // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ add Z10.H, Z22.H, Z13.H // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
addhn V26.4H, V5.4S, V9.4S // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
addhn2 V1.16B, V19.8H, V6.8H // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
addp D1, V14.2D // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
@@ -71,27 +71,27 @@ test:
addv S22, V18.4S // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
addvl X1, X27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
adr X3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
- adr Z26.D, [Z1.D, Z8.D] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- adr Z22.S, [Z28.S, Z8.S, LSL #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- adr Z11.D, [Z2.D, Z29.D, SXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- adr Z3.D, [Z9.D, Z9.D, SXTW #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- adr Z6.D, [Z7.D, Z13.D, UXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- adr Z4.D, [Z24.D, Z22.D, UXTW #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ adr Z26.D, [Z1.D, Z8.D] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ adr Z22.S, [Z28.S, Z8.S, LSL #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ adr Z11.D, [Z2.D, Z29.D, SXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ adr Z3.D, [Z9.D, Z9.D, SXTW #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ adr Z6.D, [Z7.D, Z13.D, UXTW ] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ adr Z4.D, [Z24.D, Z22.D, UXTW #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
adrp X0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
and WSP, W16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
and X2, X22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
- and Z1.B, Z1.B, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- and Z7.H, Z7.H, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- and Z7.S, Z7.S, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- and Z7.D, Z7.D, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z1.B, Z1.B, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ and Z7.H, Z7.H, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ and Z7.S, Z7.S, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ and Z7.D, Z7.D, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
and P5.B, P1/Z, P6.B, P4.B // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
and W11, W14, W24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
and W2, W21, W22, LSR #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
and X1, X20, X29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
and X8, X11, X22, ASR #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
and V29.8B, V26.8B, V26.8B // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
- and Z17.D, P6/M, Z17.D, Z12.D // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- and Z9.D, Z5.D, Z17.D // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ and Z17.D, P6/M, Z17.D, Z12.D // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ and Z9.D, Z5.D, Z17.D // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
ands W14, W8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
ands X4, X10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
ands W29, W28, W12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -99,27 +99,27 @@ test:
ands X21, X9, X6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
ands X10, X27, X7, ASR #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
ands P5.B, P1/Z, P2.B, P7.B // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
- andv H7, P6, Z31.H // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ andv H7, P6, Z31.H // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
asr W30, W14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
asr X12, X21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
- asr Z7.B, P5/M, Z7.B, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z6.H, P6/M, Z6.H, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z28.S, P0/M, Z28.S, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z26.D, P5/M, Z26.D, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z10.B, Z14.B, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z23.H, Z18.H, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z29.S, Z11.S, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z20.D, Z26.D, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z7.B, P5/M, Z7.B, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z6.H, P6/M, Z6.H, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z28.S, P0/M, Z28.S, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z26.D, P5/M, Z26.D, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z10.B, Z14.B, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z23.H, Z18.H, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z29.S, Z11.S, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z20.D, Z26.D, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
asr W3, W0, W20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
asr X7, X5, X21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
- asr Z3.S, P0/M, Z3.S, Z10.S // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z9.S, P2/M, Z9.S, Z8.D // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asr Z26.S, Z21.S, Z21.D // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- asrd Z6.B, P4/M, Z6.B, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
- asrd Z19.H, P3/M, Z19.H, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
- asrd Z16.S, P3/M, Z16.S, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
- asrd Z9.D, P6/M, Z9.D, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
- asrr Z0.B, P0/M, Z0.B, Z19.B // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ asr Z3.S, P0/M, Z3.S, Z10.S // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z9.S, P2/M, Z9.S, Z8.D // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asr Z26.S, Z21.S, Z21.D // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asrd Z6.B, P4/M, Z6.B, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asrd Z19.H, P3/M, Z19.H, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asrd Z16.S, P3/M, Z16.S, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asrd Z9.D, P6/M, Z9.D, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ asrr Z0.B, P0/M, Z0.B, Z19.B // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
asrv W24, W28, W13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
asrv X3, X21, X24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
at s12e1r, X28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
@@ -153,34 +153,34 @@ test:
b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
bfcvt H6, S20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
- bfcvt Z16.H, P6/M, Z1.S // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfcvt Z16.H, P6/M, Z1.S // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
bfcvtn V12.4H, V15.4S // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
bfcvtn2 V15.8H, V13.4S // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
- bfcvtnt Z11.H, P7/M, Z24.S // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
+ bfcvtnt Z11.H, P7/M, Z24.S // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
bfdot V0.2S, V24.4H, V14.2H[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
- bfdot Z24.S, Z26.H, Z2.H[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfdot Z24.S, Z26.H, Z2.H[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bfdot V31.4S, V21.8H, V14.8H // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
- bfdot Z15.S, Z3.H, Z7.H // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+ bfdot Z15.S, Z3.H, Z7.H // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bfi W10, W26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
bfi X25, X7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
bfm W30, W26, #14, #12 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
bfm X15, X20, #0, #35 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
- bfmlalb Z13.S, Z30.H, Z0.H[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
- bfmlalb Z3.S, Z14.H, Z13.H // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalb Z13.S, Z30.H, Z0.H[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bfmlalb Z3.S, Z14.H, Z13.H // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bfmlalb V22.4S, V11.8H, V11.H[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
bfmlalt V17.4S, V4.8H, V11.H[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
bfmlalb V13.4S, V5.8H, V17.8H // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
bfmlalt V10.4S, V16.8H, V1.8H // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
- bfmlalt Z23.S, Z3.H, Z2.H[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
- bfmlalt Z25.S, Z21.H, Z22.H // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
+ bfmlalt Z23.S, Z3.H, Z2.H[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bfmlalt Z25.S, Z21.H, Z22.H // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bfmmla V15.4S, V28.8H, V23.8H // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
- bfmmla Z26.S, Z2.H, Z12.H // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+ bfmmla Z26.S, Z2.H, Z12.H // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 2 5 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bfxil W27, W23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
bfxil X0, X5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
- bic Z28.B, Z28.B, #0x70 // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- bic Z18.H, Z18.H, #0x60 // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- bic Z12.S, Z12.S, #0x2 // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- bic Z6.D, Z6.D, #0x4 // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z28.B, Z28.B, #0x70 // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bic Z18.H, Z18.H, #0x60 // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bic Z12.S, Z12.S, #0x2 // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bic Z6.D, Z6.D, #0x4 // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bic P4.B, P4/Z, P6.B, P0.B // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
bic W0, W26, W22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
bic W23, W10, W7, LSL #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
@@ -191,8 +191,8 @@ test:
bic V24.2S, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
bic V31.2S, #192, LSL #0 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
bic V25.16B, V10.16B, V9.16B // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
- bic Z15.D, P4/M, Z15.D, Z25.D // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- bic Z7.D, Z8.D, Z28.D // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ bic Z15.D, P4/M, Z15.D, Z25.D // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ bic Z7.D, Z8.D, Z28.D // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
bics W24, W1, W25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
bics W21, W0, W24, LSL #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
bics X27, X25, X10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -231,22 +231,22 @@ test:
cinc X2, X1, NFRST // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
cinv W9, W12, TCONT // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
cinv X9, X30, FIRST // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
- clasta B11, P4, B11, Z21.B // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
- clasta W8, P0, W8, Z6.B // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
- clasta Z25.S, P1, Z25.S, Z14.S // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
- clastb D6, P7, D6, Z31.D // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
- clastb W28, P6, W28, Z12.B // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
- clastb Z27.H, P6, Z27.H, Z22.H // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+ clasta B11, P4, B11, Z21.B // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ clasta W8, P0, W8, Z6.B // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 3 9 9 1.0 V1UnitM0,V1UnitSVE1[2],V1UnitSVE1[2]
+ clasta Z25.S, P1, Z25.S, Z14.S // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ clastb D6, P7, D6, Z31.D // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ clastb W28, P6, W28, Z12.B // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 3 9 9 1.0 V1UnitM0,V1UnitSVE1[2],V1UnitSVE1[2]
+ clastb Z27.H, P6, Z27.H, Z22.H // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
clrex // CLREX \\ No description \\ No scheduling info
clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
cls V5.8B, V22.8B // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
cls W25, W0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
cls X22, X6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
- cls Z28.D, P3/M, Z2.D // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ cls Z28.D, P3/M, Z2.D // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
clz V24.8H, V30.8H // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
clz W26, W27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
clz X4, X0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
- clz Z3.S, P3/M, Z18.S // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ clz Z3.S, P3/M, Z18.S // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
cmeq D26, D5, D25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
cmeq V9.8H, V16.8H, V24.8H // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
cmeq D7, D26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
@@ -305,43 +305,43 @@ test:
cmp X27, X10, LSL #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
cmp X18, X12, LSL #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
cmp X6, X7, LSR #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
- cmpeq P2.H, P0/Z, Z26.H, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpge P1.B, P4/Z, Z28.B, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpgt P1.B, P0/Z, Z13.B, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphi P1.D, P3/Z, Z23.D, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphs P7.D, P5/Z, Z23.D, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmple P5.B, P2/Z, Z9.B, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplo P3.S, P5/Z, Z18.S, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpls P6.D, P6/Z, Z31.D, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplt P0.H, P6/Z, Z29.H, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpne P5.S, P4/Z, Z18.S, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpeq P6.S, P5/Z, Z2.S, Z9.S // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpge P7.S, P4/Z, Z15.S, Z15.S // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpgt P2.H, P4/Z, Z26.H, Z11.H // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphi P0.S, P4/Z, Z8.S, Z4.S // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphs P1.D, P6/Z, Z26.D, Z15.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpne P4.B, P3/Z, Z21.B, Z16.B // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpeq P2.D, P3/Z, Z13.D, Z18.D // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpge P2.B, P3/Z, Z3.B, Z16.D // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpgt P2.H, P2/Z, Z28.H, Z30.D // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphi P0.H, P5/Z, Z30.H, Z16.D // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmphs P7.H, P2/Z, Z1.H, Z26.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmple P7.B, P7/Z, Z3.B, Z13.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplo P6.D, P2/Z, Z16.D, Z16.D // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpls P3.H, P2/Z, Z12.H, Z26.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplt P0.D, P4/Z, Z29.D, Z26.D // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpne P0.S, P4/Z, Z30.S, Z8.D // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmple P1.D, P3/Z, Z2.D, Z26.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplo P7.B, P0/Z, Z4.B, Z25.B // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmpls P4.D, P4/Z, Z2.D, Z14.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
- cmplt P2.S, P2/Z, Z31.S, Z21.S // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
+ cmpeq P2.H, P0/Z, Z26.H, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpge P1.B, P4/Z, Z28.B, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpgt P1.B, P0/Z, Z13.B, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphi P1.D, P3/Z, Z23.D, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphs P7.D, P5/Z, Z23.D, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmple P5.B, P2/Z, Z9.B, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplo P3.S, P5/Z, Z18.S, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpls P6.D, P6/Z, Z31.D, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplt P0.H, P6/Z, Z29.H, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpne P5.S, P4/Z, Z18.S, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpeq P6.S, P5/Z, Z2.S, Z9.S // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpge P7.S, P4/Z, Z15.S, Z15.S // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpgt P2.H, P4/Z, Z26.H, Z11.H // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphi P0.S, P4/Z, Z8.S, Z4.S // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphs P1.D, P6/Z, Z26.D, Z15.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpne P4.B, P3/Z, Z21.B, Z16.B // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpeq P2.D, P3/Z, Z13.D, Z18.D // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpge P2.B, P3/Z, Z3.B, Z16.D // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpgt P2.H, P2/Z, Z28.H, Z30.D // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphi P0.H, P5/Z, Z30.H, Z16.D // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmphs P7.H, P2/Z, Z1.H, Z26.D // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmple P7.B, P7/Z, Z3.B, Z13.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplo P6.D, P2/Z, Z16.D, Z16.D // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpls P3.H, P2/Z, Z12.H, Z26.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplt P0.D, P4/Z, Z29.D, Z26.D // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpne P0.S, P4/Z, Z30.S, Z8.D // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmple P1.D, P3/Z, Z2.D, Z26.D // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplo P7.B, P0/Z, Z4.B, Z25.B // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmpls P4.D, P4/Z, Z2.D, Z14.D // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+ cmplt P2.S, P2/Z, Z31.S, Z21.S // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
cmtst D10, D6, D5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
cmtst V13.2D, V13.2D, V13.2D // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
cneg W3, W17, HI // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
cneg X26, X8, LAST // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
- cnot Z7.S, P7/M, Z8.S // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ cnot Z7.S, P7/M, Z8.S // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
cnt V12.16B, V14.16B // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
- cnt Z26.H, P0/M, Z27.H // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ cnt Z26.H, P0/M, Z27.H // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
cntb X18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
cntb X9, VL128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
cntb X28, VL8, MUL #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -355,16 +355,16 @@ test:
cntw X23, VL3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
cntw X6, VL16, MUL #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
cntp X22, P1, P2.S // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- compact Z17.S, P1, Z18.S // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
- cpy Z13.B, P0/M, B6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z3.B, P6/M, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z11.S, P5/M, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z0.H, P0/M, #-11, LSL #0 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z5.B, P1/Z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z12.H, P1/Z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z25.D, P3/Z, #-81, LSL #8 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- cpy Z24.H, P0/M, W19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
- cpy Z23.S, P2/M, WSP // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ compact Z17.S, P1, Z18.S // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ cpy Z13.B, P0/M, B6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z3.B, P6/M, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z11.S, P5/M, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z0.H, P0/M, #-11, LSL #0 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z5.B, P1/Z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z12.H, P1/Z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z25.D, P3/Z, #-81, LSL #8 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z24.H, P0/M, W19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
+ cpy Z23.S, P2/M, WSP // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
crc32b W27, W12, W15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
crc32h W3, W15, W21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
crc32w W9, W18, W24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
@@ -407,17 +407,17 @@ test:
decw X27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
decw X18, VL32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
decw X29, VL6, MUL #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- decd Z19.D // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- decd Z22.D, MUL3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- decd Z1.D, VL128, MUL #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- dech Z23.H // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- dech Z29.H, VL5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- dech Z28.H, VL64, MUL #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- decw Z8.S // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- decw Z4.S, VL64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- decw Z27.S, VL4, MUL #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ decd Z19.D // DECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ decd Z22.D, MUL3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ decd Z1.D, VL128, MUL #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dech Z23.H // DECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dech Z29.H, VL5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dech Z28.H, VL64, MUL #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ decw Z8.S // DECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ decw Z4.S, VL64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ decw Z27.S, VL4, MUL #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
decp X6, P6.B // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- decp Z22.H, P1 // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ decp Z22.H, P1 // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
dmb SY // DMB <option> \\ No description \\ No scheduling info
dmb #6 // DMB #<imm> \\ No description \\ No scheduling info
drps // DRPS \\ No description \\ No scheduling info
@@ -430,43 +430,43 @@ test:
dup V24.4S, V9.S[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
dup V20.2D, V3.D[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
dup V19.4S, W27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
- dup Z30.B, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z15.H, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z22.D, #-14, LSL #0 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z2.B, Z26.B[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z23.H, Z22.H[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z29.S, Z30.S[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- dup Z4.D, Z7.D[0] // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ dup Z30.B, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z15.H, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z22.D, #-14, LSL #0 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z2.B, Z26.B[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z23.H, Z22.H[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z29.S, Z30.S[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dup Z4.D, Z7.D[0] // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
dup Z25.D, X28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
dup Z18.S, WSP // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
- dupm Z18.B, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- dupm Z12.H, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- dupm Z16.S, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- dupm Z16.D, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ dupm Z18.B, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dupm Z12.H, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dupm Z16.S, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ dupm Z16.D, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
eon W29, W4, W19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eon W14, W24, W28, ASR #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eon X19, X12, X2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eon X23, X23, X23, ASR #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
- eon Z7.B, Z7.B, #0x70 // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eon Z3.H, Z3.H, #0x60 // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eon Z2.S, Z2.S, #0x2 // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eon Z24.D, Z24.D, #0x4 // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eon Z7.B, Z7.B, #0x70 // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eon Z3.H, Z3.H, #0x60 // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eon Z2.S, Z2.S, #0x2 // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eon Z24.D, Z24.D, #0x4 // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
eor WSP, W4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
eor X27, X25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
- eor Z19.B, Z19.B, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eor Z18.H, Z18.H, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eor Z10.S, Z10.S, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eor Z29.D, Z29.D, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z19.B, Z19.B, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eor Z18.H, Z18.H, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eor Z10.S, Z10.S, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eor Z29.D, Z29.D, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
eor P6.B, P7/Z, P3.B, P5.B // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
eor W8, W27, W2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eor W8, W7, W29, ASR #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eor X22, X16, X6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eor X0, X23, X30, LSL #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
eor V8.16B, V10.16B, V19.16B // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
- eor Z8.H, P3/M, Z8.H, Z14.H // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- eor Z30.D, Z26.D, Z20.D // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ eor Z8.H, P3/M, Z8.H, Z14.H // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ eor Z30.D, Z26.D, Z20.D // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
eors P1.B, P0/Z, P3.B, P1.B // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
- eorv H17, P1, Z15.H // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ eorv H17, P1, Z15.H // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
eret // ERET \\ No description \\ No scheduling info
esb // ESB \\ No description \\ No scheduling info
ext V12.8B, V22.8B, V31.8B, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
@@ -479,15 +479,15 @@ test:
fabd S16, S29, S6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabd V13.8H, V28.8H, V12.8H // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabd V12.4S, V4.4S, V31.4S // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
- fabd Z11.H, P6/M, Z11.H, Z5.H // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
+ fabd Z11.H, P6/M, Z11.H, Z5.H // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fabs H25, H7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabs S17, S12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabs D30, D8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabs V16.4S, V31.4S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
fabs V17.2S, V28.2S // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
- fabs Z26.S, P7/M, Z24.S // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
- facge P0.H, P5/Z, Z15.H, Z18.H // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- facgt P7.S, P7/Z, Z10.S, Z4.S // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fabs Z26.S, P7/M, Z24.S // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ facge P0.H, P5/Z, Z15.H, Z18.H // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ facgt P7.S, P7/Z, Z10.S, Z4.S // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
facge H24, H26, H29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
facge D25, D24, D7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
facge V25.4H, V16.4H, V11.4H // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
@@ -496,44 +496,44 @@ test:
facgt S29, S3, S2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
facgt V22.8H, V14.8H, V31.8H // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
facgt V22.4S, V8.4S, V2.4S // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
- facle P7.H, P5/Z, Z22.H, Z27.H // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- faclt P5.H, P5/Z, Z31.H, Z16.H // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fadd Z4.H, P7/M, Z4.H, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ facle P7.H, P5/Z, Z22.H, Z27.H // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ faclt P5.H, P5/Z, Z31.H, Z16.H // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fadd Z4.H, P7/M, Z4.H, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fadd H23, H27, H22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fadd S1, S23, S27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fadd D16, D15, D21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fadd V7.2D, V30.2D, V20.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fadd V16.2D, V13.2D, V11.2D // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
- fadd Z26.H, P4/M, Z26.H, Z1.H // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fadd Z23.S, Z7.S, Z16.S // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fadda H8, P3, H8, Z28.H // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 1 19 19 0.06 V1UnitV0[18]
- fadda S11, P6, S11, Z1.S // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 1 11 11 0.1 V1UnitV0[10]
- fadda D27, P4, D27, Z27.D // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 1 8 8 0.67 V1UnitV01[3]
+ fadd Z26.H, P4/M, Z26.H, Z1.H // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fadd Z23.S, Z7.S, Z16.S // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fadda H8, P3, H8, Z28.H // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 2 19 19 0.06 V1UnitSVE0[36],V1UnitSVE0[36]
+ fadda S11, P6, S11, Z1.S // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 2 11 11 0.1 V1UnitSVE0[20],V1UnitSVE0[20]
+ fadda D27, P4, D27, Z27.D // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 2 8 8 0.67 V1UnitSVE01[6],V1UnitSVE01[6]
faddp H10, V19.2H // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
faddp D11, V28.2D // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
faddp V16.2D, V11.2D, V5.2D // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
faddp V16.4S, V11.4S, V18.4S // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
- faddv H21, P2, Z3.H // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
- faddv S16, P2, Z25.S // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
- faddv D18, P4, Z7.D // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
- fcadd Z29.H, P2/M, Z29.H, Z15.H, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
+ faddv H21, P2, Z3.H // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+ faddv S16, P2, Z25.S // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+ faddv D18, P4, Z7.D // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+ fcadd Z29.H, P2/M, Z29.H, Z15.H, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fccmp H31, H3, #11, HS // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fccmp S5, S6, #0, CC // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fccmp D17, D15, #0, ANY // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fccmpe H6, H1, #12, ANY // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fccmpe S16, S13, #10, VS // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fccmpe D17, D14, #15, PLAST // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
- fcmeq P7.D, P1/Z, Z23.D, Z21.D // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmge P6.H, P1/Z, Z19.H, Z10.H // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmgt P5.S, P2/Z, Z29.S, Z5.S // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmne P5.D, P0/Z, Z22.D, Z15.D // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmuo P0.D, P2/Z, Z15.D, Z23.D // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmeq P4.D, P5/Z, Z19.D, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmge P0.D, P5/Z, Z10.D, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmgt P6.D, P1/Z, Z8.D, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmle P2.D, P4/Z, Z26.D, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmlt P5.D, P5/Z, Z23.D, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
- fcmne P2.H, P3/Z, Z7.H, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmeq P7.D, P1/Z, Z23.D, Z21.D // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmge P6.H, P1/Z, Z19.H, Z10.H // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmgt P5.S, P2/Z, Z29.S, Z5.S // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmne P5.D, P0/Z, Z22.D, Z15.D // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmuo P0.D, P2/Z, Z15.D, Z23.D // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmeq P4.D, P5/Z, Z19.D, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmge P0.D, P5/Z, Z10.D, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmgt P6.D, P1/Z, Z8.D, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmle P2.D, P4/Z, Z26.D, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmlt P5.D, P5/Z, Z23.D, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcmne P2.H, P3/Z, Z7.H, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fcmeq H30, H6, H1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmeq S17, S0, S21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmeq V19.2S, V31.2S, V19.2S // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
@@ -558,15 +558,15 @@ test:
fcmgt D30, D23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmgt V0.8H, V11.8H, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmgt V19.2D, V31.2D, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
- fcmla Z20.H, Z12.H, Z4.H[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
- fcmla Z1.S, Z27.S, Z6.S[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
- fcmla Z25.S, P3/M, Z13.S, Z23.S, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
- fcmle P5.S, P3/Z, Z28.S, Z12.S // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmla Z20.H, Z12.H, Z4.H[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fcmla Z1.S, Z27.S, Z6.S[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fcmla Z25.S, P3/M, Z13.S, Z23.S, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fcmle P5.S, P3/Z, Z28.S, Z12.S // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fcmle H18, H28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmle D18, D16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmle V16.8H, V11.8H, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmle V22.4S, V30.4S, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
- fcmlt P1.S, P1/Z, Z13.S, Z24.S // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
+ fcmlt P1.S, P1/Z, Z13.S, Z24.S // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fcmlt H23, H7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmlt D22, D28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
fcmlt V8.4H, V2.4H, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
@@ -583,7 +583,7 @@ test:
fcmpe S15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fcmpe D27, D22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
fcmpe D9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
- fcpy Z2.H, P7/M, #0.5 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fcpy Z2.H, P7/M, #0.5 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fcsel H26, H2, H11, NLAST // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
fcsel S5, S1, S4, VC // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
fcsel D14, D0, D19, NONE // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
@@ -593,12 +593,12 @@ test:
fcvt D9, S23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
fcvt H17, D16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
fcvt S31, D27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
- fcvt Z0.S, P1/M, Z4.H // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
- fcvt Z6.D, P0/M, Z17.H // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
- fcvt Z7.H, P7/M, Z5.S // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
- fcvt Z11.D, P2/M, Z18.S // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
- fcvt Z26.H, P0/M, Z30.D // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
- fcvt Z13.S, P2/M, Z3.D // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
+ fcvt Z0.S, P1/M, Z4.H // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvt Z6.D, P0/M, Z17.H // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcvt Z7.H, P7/M, Z5.S // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvt Z11.D, P2/M, Z18.S // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcvt Z26.H, P0/M, Z30.D // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcvt Z13.S, P2/M, Z3.D // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fcvtas W23, H3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
fcvtas X14, H29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
fcvtas W0, S13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
@@ -750,13 +750,13 @@ test:
fcvtzs V27.2S, V28.2S // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
fcvtzs V29.4S, V18.4S // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
fcvtzs V13.2D, V31.2D // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
- fcvtzs Z1.H, P2/M, Z6.H // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzs Z19.S, P4/M, Z16.H // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzs Z14.D, P0/M, Z6.H // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzs Z25.S, P5/M, Z23.S // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
- fcvtzs Z3.D, P1/M, Z31.S // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
- fcvtzs Z28.S, P5/M, Z23.D // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
- fcvtzs Z22.D, P6/M, Z29.D // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzs Z1.H, P2/M, Z6.H // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzs Z19.S, P4/M, Z16.H // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzs Z14.D, P0/M, Z6.H // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzs Z25.S, P5/M, Z23.S // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvtzs Z3.D, P1/M, Z31.S // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvtzs Z28.S, P5/M, Z23.D // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcvtzs Z22.D, P6/M, Z29.D // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fcvtzu W12, H19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
fcvtzu X17, H23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
fcvtzu W16, S3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
@@ -785,13 +785,13 @@ test:
fcvtzu V25.2S, V25.2S // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
fcvtzu V21.4S, V2.4S // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
fcvtzu V23.2D, V15.2D // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
- fcvtzu Z15.H, P0/M, Z8.H // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzu Z8.S, P5/M, Z18.H // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzu Z11.D, P4/M, Z24.H // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
- fcvtzu Z13.S, P7/M, Z8.S // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
- fcvtzu Z20.D, P2/M, Z13.S // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
- fcvtzu Z31.S, P3/M, Z20.D // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
- fcvtzu Z4.D, P1/M, Z25.D // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
+ fcvtzu Z15.H, P0/M, Z8.H // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzu Z8.S, P5/M, Z18.H // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzu Z11.D, P4/M, Z24.H // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ fcvtzu Z13.S, P7/M, Z8.S // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvtzu Z20.D, P2/M, Z13.S // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ fcvtzu Z31.S, P3/M, Z20.D // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ fcvtzu Z4.D, P1/M, Z25.D // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
fdiv H1, H26, H23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 1.0 V1UnitV02[2]
fdiv S31, S18, S12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
fdiv D6, D3, D0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
@@ -800,32 +800,32 @@ test:
fdiv V15.2S, V23.2S, V2.2S // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
fdiv V7.4S, V27.4S, V22.4S // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
fdiv V31.2D, V25.2D, V8.2D // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.14 V1UnitV02[14]
- fdiv Z21.H, P7/M, Z21.H, Z15.H // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
- fdiv Z17.S, P4/M, Z17.S, Z20.S // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
- fdiv Z13.D, P3/M, Z13.D, Z28.D // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
- fdivr Z29.H, P4/M, Z29.H, Z1.H // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
- fdivr Z13.S, P0/M, Z13.S, Z29.S // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
- fdivr Z14.D, P3/M, Z14.D, Z31.D // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
- fdup Z19.S, #0.5 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
- fexpa Z6.H, Z3.H // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
- fmad Z9.S, P5/M, Z9.S, Z7.S // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fdiv Z21.H, P7/M, Z21.H, Z15.H // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+ fdiv Z17.S, P4/M, Z17.S, Z20.S // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+ fdiv Z13.D, P3/M, Z13.D, Z28.D // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 2 15 15 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+ fdivr Z29.H, P4/M, Z29.H, Z1.H // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+ fdivr Z13.S, P0/M, Z13.S, Z29.S // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+ fdivr Z14.D, P3/M, Z14.D, Z31.D // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 2 15 15 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+ fdup Z19.S, #0.5 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fexpa Z6.H, Z3.H // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmad Z9.S, P5/M, Z9.S, Z7.S // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmadd H27, H0, H6, H28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmadd S13, S24, S15, S5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmadd D19, D4, D2, D17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fmax Z25.D, P2/M, Z25.D, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmax Z25.D, P2/M, Z25.D, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmax H8, H7, H11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmax S9, S21, S2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmax D4, D26, D26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmax V0.4S, V13.4S, V21.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmax V12.4S, V27.4S, V11.4S // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
- fmax Z16.S, P5/M, Z16.S, Z12.S // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
- fmaxnm Z25.D, P5/M, Z25.D, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmax Z16.S, P5/M, Z16.S, Z12.S // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmaxnm Z25.D, P5/M, Z25.D, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmaxnm H29, H13, H14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmaxnm S25, S20, S0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmaxnm D29, D25, D16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmaxnm V6.4S, V3.4S, V3.4S // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmaxnm V9.2D, V15.2D, V11.2D // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
- fmaxnm Z6.S, P5/M, Z6.S, Z17.S // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxnm Z6.S, P5/M, Z6.S, Z17.S // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmaxnmp H25, V19.2H // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fmaxnmp D17, V29.2D // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fmaxnmp V31.4S, V4.4S, V2.4S // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
@@ -833,9 +833,9 @@ test:
fmaxnmv H0, V13.4H // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
fmaxnmv H12, V11.8H // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
fmaxnmv S28, V31.4S // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
- fmaxnmv H9, P3, Z2.H // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
- fmaxnmv S26, P6, Z0.S // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
- fmaxnmv D7, P1, Z29.D // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fmaxnmv H9, P3, Z2.H // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+ fmaxnmv S26, P6, Z0.S // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+ fmaxnmv D7, P1, Z29.D // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
fmaxp H15, V25.2H // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fmaxp S6, V2.2S // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fmaxp V21.2S, V17.2S, V13.2S // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
@@ -843,23 +843,23 @@ test:
fmaxv H23, V4.4H // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
fmaxv H25, V15.8H // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
fmaxv S23, V2.4S // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
- fmaxv H12, P0, Z22.H // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
- fmaxv S24, P5, Z12.S // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
- fmaxv D1, P6, Z25.D // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
- fmin Z24.D, P4/M, Z24.D, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmaxv H12, P0, Z22.H // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+ fmaxv S24, P5, Z12.S // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+ fmaxv D1, P6, Z25.D // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+ fmin Z24.D, P4/M, Z24.D, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmin H4, H13, H17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmin S1, S14, S22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmin D18, D19, D22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmin V6.4S, V25.4S, V27.4S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fmin V12.2S, V30.2S, V25.2S // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
- fmin Z11.H, P3/M, Z11.H, Z16.H // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
- fminnm Z19.H, P4/M, Z19.H, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fmin Z11.H, P3/M, Z11.H, Z16.H // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fminnm Z19.H, P4/M, Z19.H, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fminnm H29, H23, H17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fminnm S24, S14, S30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fminnm D0, D26, D8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fminnm V16.2S, V23.2S, V27.2S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
fminnm V23.4S, V19.4S, V22.4S // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
- fminnm Z24.S, P3/M, Z24.S, Z13.S // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
+ fminnm Z24.S, P3/M, Z24.S, Z13.S // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fminnmp H20, V14.2H // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fminnmp D15, V8.2D // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fminnmp V27.2D, V27.2D, V16.2D // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
@@ -867,9 +867,9 @@ test:
fminnmv H19, V25.4H // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
fminnmv H23, V17.8H // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
fminnmv S29, V17.4S // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
- fminnmv H24, P3, Z1.H // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
- fminnmv S30, P3, Z9.S // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
- fminnmv D18, P5, Z8.D // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fminnmv H24, P3, Z1.H // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+ fminnmv S30, P3, Z9.S // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+ fminnmv D18, P5, Z8.D // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
fminp H7, V10.2H // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fminp S17, V7.2S // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
fminp V25.4S, V2.4S, V15.4S // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
@@ -877,9 +877,9 @@ test:
fminv H3, V30.4H // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
fminv H29, V12.8H // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
fminv S16, V19.4S // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
- fminv H15, P2, Z25.H // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
- fminv S4, P0, Z6.S // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
- fminv D20, P1, Z5.D // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
+ fminv H15, P2, Z25.H // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+ fminv S4, P0, Z6.S // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+ fminv D20, P1, Z5.D // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
fmla H23, H24, V15.H[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmla S9, S20, V28.S[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmla D12, D20, V7.D[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
@@ -887,12 +887,12 @@ test:
fmla V2.2S, V16.2S, V28.S[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmla V14.4S, V14.4S, V5.S[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmla V10.2D, V14.2D, V21.D[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fmla Z2.H, Z4.H, Z7.H[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fmla Z22.S, Z15.S, Z1.S[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fmla Z1.D, Z30.D, Z11.D[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z2.H, Z4.H, Z7.H[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmla Z22.S, Z15.S, Z1.S[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmla Z1.D, Z30.D, Z11.D[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmla V1.4S, V24.4S, V12.4S // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmla V30.2D, V16.2D, V6.2D // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fmla Z6.S, P1/M, Z24.S, Z24.S // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmla Z6.S, P1/M, Z24.S, Z24.S // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmls H8, H14, V7.H[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmls S20, S17, V5.S[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmls D11, D24, V29.D[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
@@ -900,12 +900,12 @@ test:
fmls V10.2S, V27.2S, V0.S[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmls V27.4S, V7.4S, V24.S[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmls V10.2D, V22.2D, V29.D[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fmls Z3.H, Z31.H, Z0.H[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fmls Z30.S, Z8.S, Z0.S[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fmls Z10.D, Z20.D, Z0.D[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z3.H, Z31.H, Z0.H[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmls Z30.S, Z8.S, Z0.S[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmls Z10.D, Z20.D, Z0.D[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmls V6.2S, V3.2S, V12.2S // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmls V6.8H, V15.8H, V23.8H // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fmls Z26.S, P5/M, Z28.S, Z26.S // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmls Z26.S, P5/M, Z28.S, Z26.S // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmov W15, H31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
fmov X21, H14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
fmov H6, W5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
@@ -916,8 +916,8 @@ test:
fmov V7.D[1], X8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
fmov X26, D29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
fmov X4, V26.D[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
- fmov Z2.S, P0/M, #0.5 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
- fmov Z14.S, #0.5 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
+ fmov Z2.S, P0/M, #0.5 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmov Z14.S, #0.5 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmov H18, H28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
fmov S13, S23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
fmov D27, D17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
@@ -927,9 +927,9 @@ test:
fmov V12.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
fmov V10.2S, #0.5 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
fmov V0.2D, #0.5 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
- fmov Z2.D, P2/M, #0.0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
- fmov Z5.S, #0.0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
- fmsb Z25.S, P5/M, Z25.S, Z29.S // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fmov Z2.D, P2/M, #0.0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmov Z5.S, #0.0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmsb Z25.S, P5/M, Z25.S, Z29.S // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmsub H25, H28, H12, H24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmsub S31, S0, S23, S24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fmsub D12, D10, D20, D16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
@@ -940,17 +940,17 @@ test:
fmul V5.2S, V12.2S, V9.S[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul V15.4S, V30.4S, V2.S[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul V11.2D, V31.2D, V24.D[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
- fmul Z17.H, P5/M, Z17.H, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
- fmul Z27.H, Z30.H, Z0.H[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
- fmul Z6.S, Z16.S, Z1.S[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
- fmul Z4.D, Z30.D, Z2.D[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z17.H, P5/M, Z17.H, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmul Z27.H, Z30.H, Z0.H[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmul Z6.S, Z16.S, Z1.S[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmul Z4.D, Z30.D, Z2.D[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmul H28, H14, H3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul S28, S16, S24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul D19, D19, D0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul V0.2D, V14.2D, V20.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmul V9.2D, V29.2D, V7.2D // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
- fmul Z22.D, P1/M, Z22.D, Z3.D // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
- fmul Z19.S, Z14.S, Z26.S // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmul Z22.D, P1/M, Z22.D, Z3.D // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fmul Z19.S, Z14.S, Z26.S // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmulx H18, H17, V7.H[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmulx S23, S3, V3.S[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmulx D3, D13, V30.D[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
@@ -962,20 +962,20 @@ test:
fmulx D18, D19, D22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmulx V22.2D, V18.2D, V4.2D // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
fmulx V16.2S, V4.2S, V27.2S // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
- fmulx Z7.H, P5/M, Z7.H, Z21.H // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ fmulx Z7.H, P5/M, Z7.H, Z21.H // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fneg H2, H9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
fneg S11, S19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
fneg D5, D16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
fneg V26.2D, V2.2D // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
fneg V14.2S, V24.2S // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
- fneg Z16.S, P0/M, Z25.S // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fnmad Z6.H, P2/M, Z14.H, Z21.H // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fneg Z16.S, P0/M, Z25.S // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fnmad Z6.H, P2/M, Z14.H, Z21.H // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fnmadd H3, H18, H31, H24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fnmadd S8, S18, S2, S14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fnmadd D19, D29, D28, D30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
- fnmla Z15.D, P0/M, Z8.D, Z29.D // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fnmls Z13.D, P0/M, Z8.D, Z12.D // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
- fnmsb Z30.D, P7/M, Z8.D, Z9.D // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
+ fnmla Z15.D, P0/M, Z8.D, Z29.D // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fnmls Z13.D, P0/M, Z8.D, Z12.D // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fnmsb Z30.D, P7/M, Z8.D, Z9.D // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fnmsub H3, H29, H24, H17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fnmsub S29, S26, S17, S4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
fnmsub D7, D13, D13, D4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
@@ -990,38 +990,38 @@ test:
frecpe V25.2S, V28.2S // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
frecpe V21.4S, V18.4S // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
frecpe V10.2D, V26.2D // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
- frecpe Z14.H, Z0.H // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
- frecpe Z5.S, Z16.S // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
- frecpe Z27.D, Z11.D // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frecpe Z14.H, Z0.H // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frecpe Z5.S, Z16.S // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frecpe Z27.D, Z11.D // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
frecps H29, H19, H8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frecps D25, D17, D12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frecps V12.8H, V25.8H, V4.8H // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frecps V7.2D, V29.2D, V18.2D // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
- frecps Z11.S, Z31.S, Z1.S // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
+ frecps Z11.S, Z31.S, Z1.S // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 2 4 4 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
frecpx H18, H11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
frecpx S13, S30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
- frecpx Z15.S, P4/M, Z12.S // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
- frintn Z30.H, P3/M, Z31.H // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frintn Z17.S, P4/M, Z23.S // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frintn Z28.D, P1/M, Z25.D // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frinta Z10.H, P6/M, Z17.H // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frinta Z7.S, P4/M, Z27.S // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frinta Z17.D, P4/M, Z17.D // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frintm Z26.H, P7/M, Z0.H // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frintm Z6.S, P0/M, Z28.S // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frintm Z29.D, P4/M, Z3.D // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frintp Z20.H, P4/M, Z12.H // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frintp Z3.S, P7/M, Z18.S // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frintp Z28.D, P7/M, Z4.D // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frintz Z27.H, P2/M, Z12.H // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frintz Z12.S, P6/M, Z3.S // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frintz Z12.D, P2/M, Z31.D // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frinti Z16.H, P4/M, Z9.H // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frinti Z18.S, P6/M, Z27.S // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frinti Z26.D, P2/M, Z12.D // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
- frintx Z17.H, P0/M, Z9.H // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
- frintx Z27.S, P7/M, Z16.S // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
- frintx Z21.D, P4/M, Z23.D // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
+ frecpx Z15.S, P4/M, Z12.S // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintn Z30.H, P3/M, Z31.H // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintn Z17.S, P4/M, Z23.S // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintn Z28.D, P1/M, Z25.D // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinta Z10.H, P6/M, Z17.H // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinta Z7.S, P4/M, Z27.S // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinta Z17.D, P4/M, Z17.D // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintm Z26.H, P7/M, Z0.H // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintm Z6.S, P0/M, Z28.S // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintm Z29.D, P4/M, Z3.D // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintp Z20.H, P4/M, Z12.H // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintp Z3.S, P7/M, Z18.S // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintp Z28.D, P7/M, Z4.D // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintz Z27.H, P2/M, Z12.H // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintz Z12.S, P6/M, Z3.S // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintz Z12.D, P2/M, Z31.D // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinti Z16.H, P4/M, Z9.H // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinti Z18.S, P6/M, Z27.S // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frinti Z26.D, P2/M, Z12.D // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintx Z17.H, P0/M, Z9.H // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintx Z27.S, P7/M, Z16.S // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frintx Z21.D, P4/M, Z23.D // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
frinta H22, H10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
frinta S15, S7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
frinta D30, D10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
@@ -1086,15 +1086,15 @@ test:
frsqrte V6.2S, V8.2S // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
frsqrte V30.4S, V21.4S // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
frsqrte V15.2D, V14.2D // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
- frsqrte Z6.H, Z30.H // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
- frsqrte Z27.S, Z15.S // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
- frsqrte Z6.D, Z17.D // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
+ frsqrte Z6.H, Z30.H // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frsqrte Z27.S, Z15.S // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ frsqrte Z6.D, Z17.D // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
frsqrts H28, H26, H1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frsqrts S28, S1, S11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frsqrts V8.4H, V9.4H, V30.4H // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
frsqrts V20.4S, V26.4S, V27.4S // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
- frsqrts Z10.H, Z25.H, Z22.H // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
- fscale Z2.H, P0/M, Z2.H, Z21.H // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
+ frsqrts Z10.H, Z25.H, Z22.H // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 2 4 4 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fscale Z2.H, P0/M, Z2.H, Z21.H // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fsqrt H13, H24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 1.0 V1UnitV02[2]
fsqrt S20, S15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
fsqrt D25, D21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
@@ -1103,22 +1103,22 @@ test:
fsqrt V30.2S, V20.2S // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
fsqrt V2.4S, V24.4S // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
fsqrt V28.2D, V25.2D // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.13 V1UnitV02[15]
- fsqrt Z13.H, P3/M, Z11.H // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[12]
- fsqrt Z2.S, P7/M, Z0.S // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.11 V1UnitV0[9]
- fsqrt Z17.D, P6/M, Z17.D // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[14]
- fsub Z12.D, P6/M, Z12.D, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
+ fsqrt Z13.H, P3/M, Z11.H // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+ fsqrt Z2.S, P7/M, Z0.S // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+ fsqrt Z17.D, P6/M, Z17.D // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 2 16 16 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+ fsub Z12.D, P6/M, Z12.D, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fsub H20, H11, H18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fsub S15, S4, S24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fsub D25, D26, D4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fsub V13.8H, V15.8H, V17.8H // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
fsub V1.2S, V31.2S, V27.2S // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
- fsub Z24.S, P4/M, Z24.S, Z10.S // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fsub Z19.H, Z8.H, Z29.H // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fsubr Z22.H, P7/M, Z22.H, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- fsubr Z13.S, P2/M, Z13.S, Z4.S // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
- ftmad Z19.D, Z19.D, Z6.D, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
- ftsmul Z21.S, Z0.S, Z10.S // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
- ftssel Z5.D, Z0.D, Z15.D // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+ fsub Z24.S, P4/M, Z24.S, Z10.S // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fsub Z19.H, Z8.H, Z29.H // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fsubr Z22.H, P7/M, Z22.H, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ fsubr Z13.S, P2/M, Z13.S, Z4.S // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ ftmad Z19.D, Z19.D, Z6.D, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ ftsmul Z21.S, Z0.S, Z10.S // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ ftssel Z5.D, Z0.D, Z15.D // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
hint #9 // HINT #<imm> \\ No description \\ No scheduling info
hlt #31335 // HLT #<imm> \\ No description \\ No scheduling info
hvc #60601 // HVC #<imm> \\ No description \\ No scheduling info
@@ -1136,33 +1136,33 @@ test:
incw X29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
incw X2, VL64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
incw X2, VL8, MUL #1 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- incd Z24.D // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- incd Z23.D, VL8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- incd Z20.D, VL2, MUL #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- inch Z29.H // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- inch Z28.H, VL16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- inch Z29.H, VL16, MUL #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- incw Z17.S // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- incw Z31.S, MUL3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- incw Z12.S, VL4, MUL #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ incd Z24.D // INCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ incd Z23.D, VL8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ incd Z20.D, VL2, MUL #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ inch Z29.H // INCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ inch Z28.H, VL16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ inch Z29.H, VL16, MUL #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ incw Z17.S // INCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ incw Z31.S, MUL3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ incw Z12.S, VL4, MUL #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
incp X7, P0.H // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- incp Z2.D, P6 // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
- index Z8.B, #15, W14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z14.H, #11, W10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z17.S, #14, W21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z5.D, #11, X15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
- index Z16.B, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
- index Z13.H, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
- index Z20.S, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
- index Z13.D, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 1 5 5 0.5 V1UnitV0[2]
- index Z28.B, W27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z13.H, W28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z22.S, W7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z0.D, X25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
- index Z6.B, W24, W8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z20.H, W4, W7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z10.S, W2, W19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
- index Z2.D, X23, X7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
+ incp Z2.D, P6 // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
+ index Z8.B, #15, W14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z14.H, #11, W10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z17.S, #14, W21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z5.D, #11, X15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z16.B, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z13.H, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z20.S, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z13.D, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ index Z28.B, W27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z13.H, W28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z22.S, W7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z0.D, X25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z6.B, W24, W8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z20.H, W4, W7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z10.S, W2, W19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+ index Z2.D, X23, X7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
ins V15.B[7], V6.B[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
ins V17.H[1], V3.H[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
ins V4.S[1], V7.S[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
@@ -1171,15 +1171,15 @@ test:
ins V25.H[2], W14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
ins V14.S[1], W29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
ins V19.D[1], X27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
- insr Z4.D, D0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
- insr Z4.D, X14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ insr Z4.D, D0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ insr Z4.D, X14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
isb // ISB \\ No description \\ No scheduling info
isb SY // ISB <option> \\ No description \\ No scheduling info
isb #1 // ISB #<imm> \\ No description \\ No scheduling info
- lasta B3, P1, Z3.B // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
- lasta W16, P0, Z10.B // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
- lastb D3, P1, Z17.D // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
- lastb X4, P3, Z31.D // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+ lasta B3, P1, Z3.B // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lasta W16, P0, Z10.B // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
+ lastb D3, P1, Z17.D // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lastb X4, P3, Z31.D // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
ld1 { V23.8B }, [X11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
ld1 { V25.8B }, [X30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
ld1 { V14.8B }, [X1], X26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
@@ -1478,15 +1478,15 @@ test:
ld2 { V29.S, V30.S }[1], [X12], X0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
ld2 { V1.D, V2.D }[1], [X3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
ld2 { V10.D, V11.D }[1], [X18], X27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
- ld2b { Z9.B, Z10.B }, P2/Z, [X22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2b { Z28.B, Z29.B }, P3/Z, [X22, #4, MUL VL] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2b { Z26.B, Z27.B }, P1/Z, [X3, X12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2d { Z12.D, Z13.D }, P5/Z, [X24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2d { Z22.D, Z23.D }, P2/Z, [X21, #-2, MUL VL] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2d { Z22.D, Z23.D }, P6/Z, [X14, X4, LSL #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2h { Z5.H, Z6.H }, P5/Z, [X20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2h { Z27.H, Z28.H }, P7/Z, [X11, #14, MUL VL] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2h { Z18.H, Z19.H }, P3/Z, [X9, X17, LSL #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 10 10 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitS[2]
+ ld2b { Z9.B, Z10.B }, P2/Z, [X22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2b { Z28.B, Z29.B }, P3/Z, [X22, #4, MUL VL] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2b { Z26.B, Z27.B }, P1/Z, [X3, X12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2d { Z12.D, Z13.D }, P5/Z, [X24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P2/Z, [X21, #-2, MUL VL] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2d { Z22.D, Z23.D }, P6/Z, [X14, X4, LSL #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2h { Z5.H, Z6.H }, P5/Z, [X20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2h { Z27.H, Z28.H }, P7/Z, [X11, #14, MUL VL] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2h { Z18.H, Z19.H }, P3/Z, [X9, X17, LSL #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 10 10 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2],V1UnitS[2]
ld2r { V10.8B, V11.8B }, [X20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
ld2r { V18.8B, V19.8B }, [X11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
ld2r { V28.8B, V29.8B }, [X30], X14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
@@ -1511,9 +1511,9 @@ test:
ld2r { V26.2D, V27.2D }, [X8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
ld2r { V14.2D, V15.2D }, [X3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
ld2r { V24.2D, V25.2D }, [X6], X14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
- ld2w { Z21.S, Z22.S }, P4/Z, [X12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2w { Z29.S, Z30.S }, P2/Z, [X19, #6, MUL VL] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
- ld2w { Z18.S, Z19.S }, P6/Z, [X22, X22, LSL #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
+ ld2w { Z21.S, Z22.S }, P4/Z, [X12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2w { Z29.S, Z30.S }, P2/Z, [X19, #6, MUL VL] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+ ld2w { Z18.S, Z19.S }, P6/Z, [X22, X22, LSL #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
ld3 { V8.8B, V9.8B, V10.8B }, [X0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld3 { V6.8B, V7.8B, V8.8B }, [X26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld3 { V20.8B, V21.8B, V22.8B }, [X25], X24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
@@ -1547,15 +1547,15 @@ test:
ld3 { V1.S, V2.S, V3.S }[0], [X26], X20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld3 { V14.D, V15.D, V16.D }[1], [X30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld3 { V23.D, V24.D, V25.D }[0], [X24], X14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
- ld3b { Z29.B, Z30.B, Z31.B }, P3/Z, [X17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3b { Z23.B, Z24.B, Z25.B }, P7/Z, [X12, #18, MUL VL] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3b { Z23.B, Z24.B, Z25.B }, P3/Z, [X12, X12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
- ld3d { Z20.D, Z21.D, Z22.D }, P2/Z, [X6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3d { Z1.D, Z2.D, Z3.D }, P2/Z, [X9, #-15, MUL VL] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3d { Z13.D, Z14.D, Z15.D }, P6/Z, [X27, X30, LSL #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
- ld3h { Z26.H, Z27.H, Z28.H }, P1/Z, [X29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3h { Z14.H, Z15.H, Z16.H }, P3/Z, [X18, #9, MUL VL] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3h { Z5.H, Z6.H, Z7.H }, P3/Z, [X6, X21, LSL #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld3b { Z29.B, Z30.B, Z31.B }, P3/Z, [X17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3b { Z23.B, Z24.B, Z25.B }, P7/Z, [X12, #18, MUL VL] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3b { Z23.B, Z24.B, Z25.B }, P3/Z, [X12, X12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+ ld3d { Z20.D, Z21.D, Z22.D }, P2/Z, [X6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3d { Z1.D, Z2.D, Z3.D }, P2/Z, [X9, #-15, MUL VL] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3d { Z13.D, Z14.D, Z15.D }, P6/Z, [X27, X30, LSL #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+ ld3h { Z26.H, Z27.H, Z28.H }, P1/Z, [X29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3h { Z14.H, Z15.H, Z16.H }, P3/Z, [X18, #9, MUL VL] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3h { Z5.H, Z6.H, Z7.H }, P3/Z, [X6, X21, LSL #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
ld3r { V24.8B, V25.8B, V26.8B }, [X10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld3r { V14.8B, V15.8B, V16.8B }, [X11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld3r { V22.8B, V23.8B, V24.8B }, [X0], X11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
@@ -1580,9 +1580,9 @@ test:
ld3r { V8.2D, V9.2D, V10.2D }, [X3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld3r { V3.2D, V4.2D, V5.2D }, [X25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld3r { V8.2D, V9.2D, V10.2D }, [X18], X13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
- ld3w { Z23.S, Z24.S, Z25.S }, P1/Z, [X8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3w { Z6.S, Z7.S, Z8.S }, P4/Z, [X0, #18, MUL VL] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
- ld3w { Z27.S, Z28.S, Z29.S }, P3/Z, [X3, X6, LSL #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
+ ld3w { Z23.S, Z24.S, Z25.S }, P1/Z, [X8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3w { Z6.S, Z7.S, Z8.S }, P4/Z, [X0, #18, MUL VL] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+ ld3w { Z27.S, Z28.S, Z29.S }, P3/Z, [X3, X6, LSL #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
ld4 { V6.8B, V7.8B, V8.8B, V9.8B }, [X27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld4 { V20.8B, V21.8B, V22.8B, V23.8B }, [X10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld4 { V18.8B, V19.8B, V20.8B, V21.8B }, [X24], X11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
@@ -1616,15 +1616,15 @@ test:
ld4 { V20.S, V21.S, V22.S, V23.S }[1], [X27], X16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld4 { V18.D, V19.D, V20.D, V21.D }[1], [X26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld4 { V8.D, V9.D, V10.D, V11.D }[0], [X23], X0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
- ld4b { Z16.B, Z17.B, Z18.B, Z19.B }, P3/Z, [X23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P5/Z, [X3, #12, MUL VL] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P4/Z, [X20, X12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
- ld4d { Z26.D, Z27.D, Z28.D, Z29.D }, P7/Z, [X10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4d { Z27.D, Z28.D, Z29.D, Z30.D }, P0/Z, [X6, #24, MUL VL] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4d { Z7.D, Z8.D, Z9.D, Z10.D }, P4/Z, [X25, X8, LSL #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
- ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P4/Z, [X19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P1/Z, [X16, #-8, MUL VL] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4h { Z10.H, Z11.H, Z12.H, Z13.H }, P2/Z, [X8, X28, LSL #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ld4b { Z16.B, Z17.B, Z18.B, Z19.B }, P3/Z, [X23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P5/Z, [X3, #12, MUL VL] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4b { Z7.B, Z8.B, Z9.B, Z10.B }, P4/Z, [X20, X12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
+ ld4d { Z26.D, Z27.D, Z28.D, Z29.D }, P7/Z, [X10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4d { Z27.D, Z28.D, Z29.D, Z30.D }, P0/Z, [X6, #24, MUL VL] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4d { Z7.D, Z8.D, Z9.D, Z10.D }, P4/Z, [X25, X8, LSL #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P4/Z, [X19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4h { Z4.H, Z5.H, Z6.H, Z7.H }, P1/Z, [X16, #-8, MUL VL] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4h { Z10.H, Z11.H, Z12.H, Z13.H }, P2/Z, [X8, X28, LSL #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
ld4r { V20.8B, V21.8B, V22.8B, V23.8B }, [X23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld4r { V24.8B, V25.8B, V26.8B, V27.8B }, [X15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld4r { V4.8B, V5.8B, V6.8B, V7.8B }, [X26], X6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
@@ -1649,9 +1649,9 @@ test:
ld4r { V7.2D, V8.2D, V9.2D, V10.2D }, [X8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
ld4r { V12.2D, V13.2D, V14.2D, V15.2D }, [X2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
ld4r { V17.2D, V18.2D, V19.2D, V20.2D }, [X21], X13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
- ld4w { Z18.S, Z19.S, Z20.S, Z21.S }, P6/Z, [X4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5/Z, [X16, #-8, MUL VL] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
- ld4w { Z25.S, Z26.S, Z27.S, Z28.S }, P2/Z, [X23, X8, LSL #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+ ld4w { Z18.S, Z19.S, Z20.S, Z21.S }, P6/Z, [X4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4w { Z21.S, Z22.S, Z23.S, Z24.S }, P5/Z, [X16, #-8, MUL VL] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+ ld4w { Z25.S, Z26.S, Z27.S, Z28.S }, P2/Z, [X23, X8, LSL #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
ldapur W7, [X24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
ldapur W25, [X29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
ldapur X20, [X13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
@@ -2068,68 +2068,68 @@ test:
ldxrh W24, [X11, #0] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
lsl W25, W0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
lsl X27, X7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
- lsl Z1.B, P1/M, Z1.B, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z7.H, P3/M, Z7.H, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z10.S, P3/M, Z10.S, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z21.D, P7/M, Z21.D, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z13.B, Z4.B, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z11.H, Z16.H, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z16.S, Z11.S, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z18.D, Z4.D, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z1.B, P1/M, Z1.B, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z7.H, P3/M, Z7.H, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z10.S, P3/M, Z10.S, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z21.D, P7/M, Z21.D, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z13.B, Z4.B, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z11.H, Z16.H, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z16.S, Z11.S, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z18.D, Z4.D, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
lsl W4, W9, W12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
lsl X7, X29, X22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
- lsl Z3.D, P2/M, Z3.D, Z15.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z3.S, P6/M, Z3.S, Z8.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsl Z19.S, Z25.S, Z25.D // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lslr Z3.H, P5/M, Z3.H, Z23.H // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsl Z3.D, P2/M, Z3.D, Z15.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z3.S, P6/M, Z3.S, Z8.D // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsl Z19.S, Z25.S, Z25.D // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lslr Z3.H, P5/M, Z3.H, Z23.H // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
lslv W6, W8, W2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
lslv X7, X26, X21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
lsr W0, W0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
lsr X23, X24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
- lsr Z21.B, P5/M, Z21.B, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z1.H, P4/M, Z1.H, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z24.S, P7/M, Z24.S, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z13.D, P3/M, Z13.D, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z3.B, Z11.B, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z5.H, Z12.H, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z21.S, Z16.S, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z21.D, Z15.D, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z21.B, P5/M, Z21.B, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z1.H, P4/M, Z1.H, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z24.S, P7/M, Z24.S, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z13.D, P3/M, Z13.D, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z3.B, Z11.B, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z5.H, Z12.H, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z21.S, Z16.S, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z21.D, Z15.D, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
lsr W17, W20, W15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
lsr X24, X4, X20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
- lsr Z30.D, P3/M, Z30.D, Z28.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z18.H, P3/M, Z18.H, Z29.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsr Z7.H, Z30.H, Z11.D // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
- lsrr Z14.B, P1/M, Z14.B, Z16.B // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+ lsr Z30.D, P3/M, Z30.D, Z28.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z18.H, P3/M, Z18.H, Z29.D // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsr Z7.H, Z30.H, Z11.D // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ lsrr Z14.B, P1/M, Z14.B, Z16.B // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
lsrv W0, W28, W19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
lsrv X16, X22, X19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
- mad Z17.B, P7/M, Z4.B, Z5.B // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mad Z29.H, P4/M, Z31.H, Z18.H // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mad Z7.S, P4/M, Z5.S, Z29.S // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mad Z28.D, P7/M, Z18.D, Z2.D // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mad Z17.B, P7/M, Z4.B, Z5.B // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mad Z29.H, P4/M, Z31.H, Z18.H // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mad Z7.S, P4/M, Z5.S, Z29.S // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mad Z28.D, P7/M, Z18.D, Z2.D // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
madd W15, W9, W9, W29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
madd X29, X22, X21, X21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
mla V15.8H, V22.8H, V4.H[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
mla V28.2S, V10.2S, V2.S[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
mla V31.4S, V18.4S, V27.4S // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
- mla Z1.B, P0/M, Z3.B, Z3.B // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mla Z21.H, P2/M, Z31.H, Z30.H // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mla Z24.S, P3/M, Z11.S, Z9.S // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mla Z2.D, P0/M, Z12.D, Z5.D // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mla Z1.B, P0/M, Z3.B, Z3.B // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mla Z21.H, P2/M, Z31.H, Z30.H // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mla Z24.S, P3/M, Z11.S, Z9.S // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mla Z2.D, P0/M, Z12.D, Z5.D // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
mls V25.8H, V29.8H, V0.H[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
mls V22.2S, V29.2S, V0.S[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
mls V26.4S, V5.4S, V28.4S // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
- mls Z11.B, P1/M, Z28.B, Z6.B // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mls Z31.H, P0/M, Z25.H, Z24.H // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mls Z1.S, P5/M, Z7.S, Z13.S // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- mls Z2.D, P1/M, Z17.D, Z10.D // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ mls Z11.B, P1/M, Z28.B, Z6.B // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mls Z31.H, P0/M, Z25.H, Z24.H // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mls Z1.S, P5/M, Z7.S, Z13.S // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mls Z2.D, P1/M, Z17.D, Z10.D // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
mneg W14, W30, W30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
mneg X21, X3, X9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
- mov Z9.S, P2/M, S10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z17.B, Z29.B[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z26.H, Z7.H[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z14.S, Z21.S[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z22.D, Z14.D[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z21.S, S25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z9.S, P2/M, S10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z17.B, Z29.B[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z26.H, Z7.H[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z14.S, Z21.S[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z22.D, Z14.D[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z21.S, S25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
mov WSP, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
mov X3, #0x1e00 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
mov V30.B[12], V17.B[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
@@ -2140,15 +2140,15 @@ test:
mov V27.H[6], W6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
mov V21.S[0], W21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
mov V13.D[0], X10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
- mov Z30.B, P7/M, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z30.D, P7/M, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z10.H, P5/M, #72, LSL #0 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z19.B, P6/Z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z6.D, P1/Z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z12.D, P7/Z, #40, LSL #8 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
- mov Z30.B, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z2.H, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
- mov Z20.H, #82, LSL #8 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+ mov Z30.B, P7/M, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z30.D, P7/M, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z10.H, P5/M, #72, LSL #0 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z19.B, P6/Z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z6.D, P1/Z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z12.D, P7/Z, #40, LSL #8 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z30.B, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z2.H, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z20.H, #82, LSL #8 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
mov W24, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
mov X15, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
mov P0.B, P0/M, P6.B // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
@@ -2159,8 +2159,8 @@ test:
mov H13, V17.H[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
mov S7, V11.S[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
mov D27, V24.D[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
- mov Z12.D, P5/M, X24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
- mov Z31.D, P6/M, SP // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+ mov Z12.D, P5/M, X24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z31.D, P6/M, SP // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
mov Z19.B, W27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
mov Z17.H, WSP // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
mov W13, V12.S[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
@@ -2168,14 +2168,14 @@ test:
mov WSP, WSP // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
mov X1, X11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
mov V12.16B, V6.16B // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
- mov Z1.D, P3/M, Z6.D // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
- mov Z24.D, Z25.D // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ mov Z1.D, P3/M, Z6.D // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z24.D, Z25.D // MOV <Zd>.D, <Zn>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
mov W30, #0xe00 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
mov X4, #0xe00 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
- mov Z14.B, #0x70 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- mov Z8.H, #0x60 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- mov Z2.S, #0x2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
- mov Z6.D, #0x4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+ mov Z14.B, #0x70 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z8.H, #0x60 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z2.S, #0x2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ mov Z6.D, #0x4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
mov P2.B, P5.B // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
movi V7.16B, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
movi V14.8H, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
@@ -2193,9 +2193,9 @@ test:
movn W27, #47742, LSL #0 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
movn X10, #63431 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
movn X0, #58015, LSL #48 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
- movprfx Z22.B, P0/M, Z4.B // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ movprfx Z22.B, P0/M, Z4.B // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
mla Z22.B, P0/M, Z19.B, Z25.B // Ignore
- movprfx Z3, Z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
+ movprfx Z3, Z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
fmla Z3.D, P0/M, Z8.D, Z19.D // Ignore
movs P0.B, P7/Z, P3.B // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
movs P4.B, P0.B // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
@@ -2205,10 +2205,10 @@ test:
movz X11, #20464, LSL #48 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
mrs X4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
mrs X14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
- msb Z18.B, P1/M, Z27.B, Z0.B // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- msb Z27.H, P5/M, Z23.H, Z1.H // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- msb Z26.S, P2/M, Z0.S, Z2.S // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
- msb Z1.D, P6/M, Z12.D, Z12.D // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+ msb Z18.B, P1/M, Z27.B, Z0.B // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ msb Z27.H, P5/M, Z23.H, Z1.H // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ msb Z26.S, P2/M, Z0.S, Z2.S // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ msb Z1.D, P6/M, Z12.D, Z12.D // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
msr ACTLR_EL3, X18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
@@ -2219,15 +2219,15 @@ test:
mul V5.8H, V21.8H, V3.H[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
mul V29.2S, V10.2S, V3.S[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
mul V30.4S, V11.4S, V4.S[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
- mul Z16.B, Z16.B, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z9.H, Z9.H, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z23.S, Z23.S, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z15.D, Z15.D, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ mul Z16.B, Z16.B, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z9.H, Z9.H, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z23.S, Z23.S, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z15.D, Z15.D, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
mul V3.8H, V9.8H, V8.8H // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
- mul Z17.B, P6/M, Z17.B, Z9.B // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z18.H, P7/M, Z18.H, Z15.H // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z29.S, P6/M, Z29.S, Z8.S // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- mul Z25.D, P1/M, Z25.D, Z25.D // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ mul Z17.B, P6/M, Z17.B, Z9.B // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z18.H, P7/M, Z18.H, Z15.H // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z29.S, P6/M, Z29.S, Z8.S // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ mul Z25.D, P1/M, Z25.D, Z25.D // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
mul W8, W13, W20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
mul X12, X8, X25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
mvn W0, W18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
@@ -2250,7 +2250,7 @@ test:
neg X0, X16, LSR #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
neg D18, D20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
neg V16.2D, V14.2D // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
- neg Z16.B, P2/M, Z15.B // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ neg Z16.B, P2/M, Z15.B // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
negs W30, W22, LSL #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
negs W8, W8, LSL #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
negs W12, W21, ASR #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
@@ -2265,13 +2265,13 @@ test:
nor P4.B, P4/Z, P0.B, P4.B // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
nors P1.B, P0/Z, P7.B, P6.B // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
not P7.B, P2/Z, P6.B // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
- not Z29.S, P4/M, Z9.S // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ not Z29.S, P4/M, Z9.S // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
not V15.8B, V29.8B // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
nots P7.B, P3/Z, P1.B // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
- orn Z5.B, Z5.B, #0x70 // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orn Z14.H, Z14.H, #0x60 // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orn Z14.S, Z14.S, #0x2 // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orn Z27.D, Z27.D, #0x4 // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orn Z5.B, Z5.B, #0x70 // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orn Z14.H, Z14.H, #0x60 // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orn Z14.S, Z14.S, #0x2 // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orn Z27.D, Z27.D, #0x4 // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
orn P1.B, P2/Z, P3.B, P5.B // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
orn W2, W27, W7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
orn W6, W28, W14, LSL #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
@@ -2281,10 +2281,10 @@ test:
orns P3.B, P3/Z, P0.B, P3.B // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
orr WSP, W27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
orr X27, X6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
- orr Z4.B, Z4.B, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orr Z26.H, Z26.H, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orr Z3.S, Z3.S, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orr Z30.D, Z30.D, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z4.B, Z4.B, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orr Z26.H, Z26.H, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orr Z3.S, Z3.S, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orr Z30.D, Z30.D, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
orr P6.B, P4/Z, P4.B, P3.B // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
orr W14, W1, W23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
orr W25, W22, W0, ASR #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
@@ -2295,10 +2295,10 @@ test:
orr V4.4S, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
orr V17.4S, #119, LSL #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
orr V12.16B, V9.16B, V1.16B // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
- orr Z28.H, P3/M, Z28.H, Z7.H // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
- orr Z8.D, Z14.D, Z19.D // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+ orr Z28.H, P3/M, Z28.H, Z7.H // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ orr Z8.D, Z14.D, Z19.D // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
orrs P7.B, P7/Z, P6.B, P5.B // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
- orv D19, P6, Z31.D // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+ orv D19, P6, Z31.D // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
pfalse P6.B // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
pfirst P0.B, P5, P0.B // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
pmul V30.8B, V0.8B, V27.8B // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
@@ -2408,7 +2408,7 @@ test:
rbit V16.16B, V21.16B // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
rbit W27, W10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
rbit X30, X0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
- rbit Z23.S, P3/M, Z10.S // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+ rbit Z23.S, P3/M, Z10.S // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
rdffr P2.B, P1/Z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 1 3 3 0.5 V1UnitM0[2]
rdffr P5.B // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
rdffrs P7.B, P2/Z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 0.33 V1UnitM[6]
@@ -2416,7 +2416,7 @@ test:
ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
ret X14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
rev P1.H, P2.H // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
- rev Z11.D, Z24.D // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ rev Z11.D, Z24.D // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
rev W19, W20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
rev X30, X15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
rev16 V5.16B, V26.16B // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
@@ -2426,9 +2426,9 @@ test:
rev32 X30, X6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
rev64 X5, X2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
rev64 V0.2S, V19.2S // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
- revb Z3.D, P2/M, Z21.D // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
- revh Z1.D, P5/M, Z19.D // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
- revw Z16.D, P1/M, Z3.D // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+ revb Z3.D, P2/M, Z21.D // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ revh Z1.D, P5/M, Z19.D // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ revw Z16.D, P1/M, Z3.D // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
ror W20, W13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
ror X5, X8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
ror W29, W26, W0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
@@ -2447,7 +2447,7 @@ test:
sabal V2.2D, V5.2S, V31.2S // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
sabal2 V21.2D, V15.4S, V13.4S // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
sabd V12.2S, V11.2S, V27.2S // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
- sabd Z14.S, P1/M, Z14.S, Z23.S // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sabd Z14.S, P1/M, Z14.S, Z23.S // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sabdl V28.2D, V4.2S, V19.2S // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
sabdl2 V10.8H, V30.16B, V4.16B // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
sadalp V3.4H, V5.8B // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
@@ -2459,9 +2459,9 @@ test:
saddlv S24, V29.4H // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
saddlv S22, V23.8H // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
saddlv D2, V27.4S // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- saddv D19, P6, Z1.B // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- saddv D7, P2, Z14.H // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- saddv D4, P7, Z27.S // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
+ saddv D19, P6, Z1.B // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ saddv D7, P2, Z14.H // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ saddv D4, P7, Z27.S // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
saddw V8.4S, V0.4S, V1.4H // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
saddw2 V24.8H, V10.8H, V30.16B // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
sbc W0, W16, W1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
@@ -2502,27 +2502,27 @@ test:
scvtf V9.2S, V31.2S // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
scvtf V2.4S, V7.4S // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
scvtf V18.2D, V11.2D // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
- scvtf Z3.H, P3/M, Z29.H // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
- scvtf Z1.H, P5/M, Z27.S // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- scvtf Z30.S, P4/M, Z29.S // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- scvtf Z18.D, P3/M, Z16.S // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- scvtf Z18.H, P1/M, Z14.D // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
- scvtf Z10.S, P1/M, Z11.D // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
- scvtf Z3.D, P2/M, Z27.D // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ scvtf Z3.H, P3/M, Z29.H // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ scvtf Z1.H, P5/M, Z27.S // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ scvtf Z30.S, P4/M, Z29.S // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ scvtf Z18.D, P3/M, Z16.S // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ scvtf Z18.H, P1/M, Z14.D // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ scvtf Z10.S, P1/M, Z11.D // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ scvtf Z3.D, P2/M, Z27.D // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
sdiv W6, W28, W24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
sdiv X19, X2, X14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
- sdiv Z24.S, P1/M, Z24.S, Z14.S // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
- sdiv Z7.D, P6/M, Z7.D, Z20.D // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
- sdivr Z10.S, P2/M, Z10.S, Z7.S // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
- sdivr Z0.D, P3/M, Z0.D, Z9.D // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
- sdot Z6.S, Z29.B, Z0.B[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
- sdot Z0.D, Z18.H, Z10.H[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
- sdot Z28.S, Z30.B, Z14.B // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
- sdot Z19.D, Z5.H, Z8.H // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ sdiv Z24.S, P1/M, Z24.S, Z14.S // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+ sdiv Z7.D, P6/M, Z7.D, Z20.D // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+ sdivr Z10.S, P2/M, Z10.S, Z7.S // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+ sdivr Z0.D, P3/M, Z0.D, Z9.D // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+ sdot Z6.S, Z29.B, Z0.B[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sdot Z0.D, Z18.H, Z10.H[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ sdot Z28.S, Z30.B, Z14.B // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sdot Z19.D, Z5.H, Z8.H // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
sdot V2.4S, V27.16B, V5.4B[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
sdot V3.2S, V20.8B, V10.8B // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
sel P1.B, P7, P5.B, P4.B // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
- sel Z0.H, P7, Z13.H, Z13.H // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+ sel Z0.H, P7, Z13.H, Z13.H // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
sev // SEV \\ No description \\ No scheduling info
sevl // SEVL \\ No description \\ No scheduling info
@@ -2551,8 +2551,8 @@ test:
sli V29.2S, V14.2S, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
sli V25.2D, V21.2D, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
smaddl X17, W27, W30, X3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
- smax Z3.S, Z3.S, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- smax Z0.B, P5/M, Z0.B, Z20.B // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smax Z3.S, Z3.S, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ smax Z0.B, P5/M, Z0.B, Z20.B // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
smax V30.16B, V3.16B, V30.16B // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
smaxp V21.8H, V16.8H, V7.8H // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
smaxv B4, V30.8B // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
@@ -2560,13 +2560,13 @@ test:
smaxv H28, V14.4H // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
smaxv H6, V19.8H // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
smaxv S3, V14.4S // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- smaxv B19, P4, Z14.B // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- smaxv H0, P6, Z20.H // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- smaxv S11, P2, Z28.S // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
- smaxv D24, P5, Z24.D // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ smaxv B19, P4, Z14.B // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ smaxv H0, P6, Z20.H // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ smaxv S11, P2, Z28.S // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ smaxv D24, P5, Z24.D // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
smc #32343 // SMC #<imm> \\ No description \\ No scheduling info
- smin Z21.S, Z21.S, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- smin Z22.S, P0/M, Z22.S, Z30.S // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ smin Z21.S, Z21.S, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ smin Z22.S, P0/M, Z22.S, Z30.S // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
smin V29.4S, V24.4S, V24.4S // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
sminp V7.8H, V27.8H, V7.8H // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
sminv B6, V11.8B // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
@@ -2574,10 +2574,10 @@ test:
sminv H24, V23.4H // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
sminv H2, V9.8H // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
sminv S16, V15.4S // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- sminv B4, P2, Z10.B // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- sminv H15, P7, Z10.H // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- sminv S29, P0, Z27.S // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
- sminv D17, P2, Z18.D // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ sminv B4, P2, Z10.B // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ sminv H15, P7, Z10.H // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ sminv S29, P0, Z27.S // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ sminv D17, P2, Z18.D // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
smlal V16.4S, V9.4H, V11.H[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
smlal V0.2D, V25.2S, V1.S[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
smlal2 V1.4S, V9.8H, V0.H[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
@@ -2603,10 +2603,10 @@ test:
smov X15, V3.S[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
smov X5, V29.S[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
smsubl X8, W24, W13, X6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
- smulh Z11.B, P5/M, Z11.B, Z17.B // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- smulh Z8.H, P0/M, Z8.H, Z4.H // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- smulh Z27.S, P7/M, Z27.S, Z30.S // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- smulh Z4.D, P7/M, Z4.D, Z28.D // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ smulh Z11.B, P5/M, Z11.B, Z17.B // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ smulh Z8.H, P0/M, Z8.H, Z4.H // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ smulh Z27.S, P7/M, Z27.S, Z30.S // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ smulh Z4.D, P7/M, Z4.D, Z28.D // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
smulh X8, X29, X17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
smull X19, W0, W6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
smull V3.4S, V26.4H, V1.H[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
@@ -2617,10 +2617,10 @@ test:
smull2 V7.2D, V14.4S, V15.4S // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
sqabs D15, D26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqabs V25.8H, V24.8H // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
- sqadd Z1.B, Z1.B, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqadd Z18.H, Z18.H, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqadd Z3.D, Z3.D, #158, LSL #0 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqadd Z19.D, Z27.D, Z28.D // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqadd Z1.B, Z1.B, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqadd Z18.H, Z18.H, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqadd Z3.D, Z3.D, #158, LSL #0 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqadd Z19.D, Z27.D, Z28.D // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqadd H12, H18, H10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqadd V15.2S, V13.2S, V28.2S // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqdecb X26, W26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -2635,30 +2635,30 @@ test:
sqdecd X18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecd X11, VL5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecd X21, ALL, MUL #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqdecd Z27.D // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdecd Z2.D, VL128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdecd Z23.D, VL1, MUL #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecd Z27.D // SQDECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdecd Z2.D, VL128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdecd Z23.D, VL1, MUL #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqdech X7, W7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdech X10, W10, VL128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdech X16, W16, VL6, MUL #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdech X6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdech X17, VL128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdech X27, VL128, MUL #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqdech Z16.H // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdech Z21.H, VL6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdech Z7.H, MUL3, MUL #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdech Z16.H // SQDECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdech Z21.H, VL6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdech Z7.H, MUL3, MUL #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqdecp X1, P4.B, W1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
sqdecp X26, P6.D // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- sqdecp Z10.D, P3 // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ sqdecp Z10.D, P3 // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
sqdecw X13, W13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecw X2, W2, POW2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecw X26, W26, VL8, MUL #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecw X10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecw X17, VL128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqdecw X13, MUL4, MUL #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqdecw Z7.S // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdecw Z10.S, POW2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqdecw Z28.S, VL2, MUL #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqdecw Z7.S // SQDECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdecw Z10.S, POW2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqdecw Z28.S, VL2, MUL #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqdmlal S23, H16, V4.H[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
sqdmlal D12, S18, V3.S[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
sqdmlal V20.4S, V30.4H, V12.H[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
@@ -2704,30 +2704,30 @@ test:
sqincd X10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincd X17, VL5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincd X13, VL64, MUL #1 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqincd Z24.D // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqincd Z10.D, VL128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqincd Z29.D, VL128, MUL #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincd Z24.D // SQINCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqincd Z10.D, VL128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqincd Z29.D, VL128, MUL #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqinch X28, W28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqinch X30, W30, VL1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqinch X16, W16, VL4, MUL #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqinch X23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqinch X10, VL64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqinch X16, POW2, MUL #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqinch Z3.H // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqinch Z23.H, VL4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqinch Z6.H, VL128, MUL #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqinch Z3.H // SQINCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqinch Z23.H, VL4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqinch Z6.H, VL128, MUL #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqincp X13, P2.H, W13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
sqincp X0, P7.H // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- sqincp Z9.H, P1 // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ sqincp Z9.H, P1 // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
sqincw X24, W24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincw X16, W16, MUL4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincw X27, W27, VL32, MUL #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincw X29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincw X25, VL7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
sqincw X21, VL8, MUL #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- sqincw Z30.S // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqincw Z8.S, MUL3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- sqincw Z0.S, VL5, MUL #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ sqincw Z30.S // SQINCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqincw Z8.S, MUL3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqincw Z0.S, VL5, MUL #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqneg D24, D22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqneg V30.16B, V15.16B // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqrdmlah H14, H4, V6.H[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
@@ -2806,10 +2806,10 @@ test:
sqshrun2 V10.16B, V28.8H, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
sqshrun2 V4.8H, V28.4S, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
sqshrun2 V7.4S, V18.2D, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
- sqsub Z13.B, Z13.B, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqsub Z28.H, Z28.H, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqsub Z11.S, Z11.S, #14, LSL #0 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sqsub Z28.S, Z9.S, Z12.S // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sqsub Z13.B, Z13.B, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqsub Z28.H, Z28.H, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqsub Z11.S, Z11.S, #14, LSL #0 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sqsub Z28.S, Z9.S, Z12.S // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sqsub B3, B13, B12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqsub V20.8H, V18.8H, V12.8H // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
sqxtn B11, H22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
@@ -3361,9 +3361,9 @@ test:
sub WSP, WSP, #84, LSL #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
sub X18, X22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
sub X17, X20, #184, LSL #0 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
- sub Z18.B, Z18.B, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sub Z22.S, Z22.S, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sub Z15.H, Z15.H, #196, LSL #8 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z18.B, Z18.B, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sub Z22.S, Z22.S, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sub Z15.H, Z15.H, #196, LSL #8 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
sub W0, W21, W2, LSL #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
sub W22, W7, W13, LSL #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
sub W1, W18, W16, ASR #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
@@ -3372,14 +3372,14 @@ test:
sub X24, X19, X13, LSR #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
sub D18, D25, D0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
sub V15.2S, V14.2S, V11.2S // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
- sub Z18.H, P4/M, Z18.H, Z7.H // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- sub Z29.B, Z19.B, Z8.B // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ sub Z18.H, P4/M, Z18.H, Z7.H // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sub Z29.B, Z19.B, Z8.B // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
subhn V7.4H, V10.4S, V13.4S // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
subhn2 V24.4S, V24.2D, V8.2D // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
- subr Z13.B, Z13.B, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- subr Z17.S, Z17.S, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- subr Z15.D, Z15.D, #100, LSL #0 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- subr Z21.D, P7/M, Z21.D, Z24.D // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ subr Z13.B, Z13.B, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ subr Z17.S, Z17.S, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ subr Z15.D, Z15.D, #100, LSL #0 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ subr Z21.D, P7/M, Z21.D, Z24.D // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
subs W25, WSP, W13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
subs W10, WSP, W9, UXTH // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
subs W20, WSP, W3, SXTH #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -3402,16 +3402,16 @@ test:
subs X26, X14, X30, LSR #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
sudot V4.2S, V20.8B, V18.4B[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
sudot Z5.S, Z30.B, Z3.B[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
- sunpkhi Z22.D, Z16.S // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
- sunpklo Z10.H, Z0.B // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ sunpkhi Z22.D, Z16.S // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ sunpklo Z10.H, Z0.B // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
suqadd B15, B21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
suqadd V26.16B, V27.16B // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
svc #35275 // SVC #<imm> \\ No description \\ No scheduling info
sxtb W7, W20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
sxtb X18, W14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
- sxtb Z16.H, P5/M, Z15.H // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
- sxth Z4.S, P7/M, Z11.S // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
- sxtw Z12.D, P1/M, Z16.D // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ sxtb Z16.H, P5/M, Z15.H // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ sxth Z4.S, P7/M, Z11.S // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ sxtw Z12.D, P1/M, Z16.D // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
sxth W23, W2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
sxth X22, W17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
sxtl V4.8H, V21.8B // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
@@ -3448,7 +3448,7 @@ test:
uabal V13.2D, V16.2S, V11.2S // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
uabal2 V17.4S, V0.8H, V1.8H // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
uabd V23.4S, V4.4S, V30.4S // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
- uabd Z5.B, P5/M, Z5.B, Z10.B // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uabd Z5.B, P5/M, Z5.B, Z10.B // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uabdl V13.4S, V26.4H, V7.4H // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
uabdl2 V15.2D, V9.4S, V10.4S // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
uadalp V31.1D, V14.2S // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
@@ -3460,10 +3460,10 @@ test:
uaddlv S12, V24.4H // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
uaddlv S30, V0.8H // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
uaddlv D6, V19.4S // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- uaddv D9, P5, Z1.B // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- uaddv D26, P0, Z25.H // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- uaddv D4, P1, Z1.S // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
- uaddv D28, P6, Z6.D // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ uaddv D9, P5, Z1.B // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uaddv D26, P0, Z25.H // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uaddv D4, P1, Z1.S // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uaddv D28, P6, Z6.D // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
uaddw V17.2D, V9.2D, V12.2S // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
uaddw2 V15.4S, V13.4S, V4.8H // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
ubfiz W11, W6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
@@ -3500,31 +3500,31 @@ test:
ucvtf V14.2S, V2.2S // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
ucvtf V20.4S, V0.4S // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
ucvtf V27.2D, V3.2D // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
- ucvtf Z31.H, P5/M, Z30.H // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
- ucvtf Z23.H, P7/M, Z9.S // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- ucvtf Z1.S, P1/M, Z10.S // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- ucvtf Z24.D, P5/M, Z9.S // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
- ucvtf Z30.H, P2/M, Z24.D // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
- ucvtf Z9.S, P5/M, Z9.D // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
- ucvtf Z18.D, P6/M, Z19.D // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+ ucvtf Z31.H, P5/M, Z30.H // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+ ucvtf Z23.H, P7/M, Z9.S // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ ucvtf Z1.S, P1/M, Z10.S // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ ucvtf Z24.D, P5/M, Z9.S // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+ ucvtf Z30.H, P2/M, Z24.D // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ ucvtf Z9.S, P5/M, Z9.D // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ ucvtf Z18.D, P6/M, Z19.D // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
udiv W12, W17, W22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
udiv X7, X2, X23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
- udiv Z30.S, P5/M, Z30.S, Z10.S // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
- udiv Z31.D, P5/M, Z31.D, Z29.D // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
- udivr Z19.S, P4/M, Z19.S, Z8.S // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
- udivr Z3.D, P5/M, Z3.D, Z8.D // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
- udot Z0.S, Z5.B, Z4.B[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
- udot Z19.D, Z1.H, Z13.H[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
- udot Z22.S, Z29.B, Z4.B // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
- udot Z9.D, Z1.H, Z11.H // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
+ udiv Z30.S, P5/M, Z30.S, Z10.S // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+ udiv Z31.D, P5/M, Z31.D, Z29.D // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+ udivr Z19.S, P4/M, Z19.S, Z8.S // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+ udivr Z3.D, P5/M, Z3.D, Z8.D // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+ udot Z0.S, Z5.B, Z4.B[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ udot Z19.D, Z1.H, Z13.H[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ udot Z22.S, Z29.B, Z4.B // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ udot Z9.D, Z1.H, Z11.H // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
udot V10.2S, V11.8B, V21.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
udot V7.4S, V21.16B, V6.4B[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
udot V19.2S, V31.8B, V17.8B // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
uhadd V10.8H, V7.8H, V7.8H // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
uhsub V12.4H, V16.4H, V28.4H // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
umaddl X9, W28, W9, X19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
- umax Z8.B, Z8.B, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- umax Z27.B, P1/M, Z27.B, Z13.B // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umax Z8.B, Z8.B, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ umax Z27.B, P1/M, Z27.B, Z13.B // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
umax V7.16B, V11.16B, V7.16B // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
umaxp V15.8H, V8.8H, V12.8H // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
umaxv B19, V7.8B // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
@@ -3532,12 +3532,12 @@ test:
umaxv H27, V5.4H // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
umaxv H11, V22.8H // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
umaxv S5, V25.4S // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- umaxv B9, P7, Z19.B // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- umaxv H8, P7, Z26.H // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- umaxv S15, P2, Z28.S // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
- umaxv D11, P4, Z11.D // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
- umin Z21.S, Z21.S, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- umin Z31.S, P2/M, Z31.S, Z4.S // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ umaxv B9, P7, Z19.B // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ umaxv H8, P7, Z26.H // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ umaxv S15, P2, Z28.S // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ umaxv D11, P4, Z11.D // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ umin Z21.S, Z21.S, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ umin Z31.S, P2/M, Z31.S, Z4.S // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
umin V0.16B, V26.16B, V2.16B // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
uminp V28.4S, V16.4S, V15.4S // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
uminv B23, V21.8B // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
@@ -3545,10 +3545,10 @@ test:
uminv H6, V22.4H // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
uminv H23, V3.8H // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
uminv S29, V19.4S // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
- uminv B2, P5, Z8.B // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
- uminv H28, P0, Z0.H // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
- uminv S10, P1, Z29.S // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
- uminv D24, P5, Z29.D // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+ uminv B2, P5, Z8.B // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uminv H28, P0, Z0.H // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uminv S10, P1, Z29.S // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+ uminv D24, P5, Z29.D // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
umlal V22.4S, V14.4H, V0.H[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
umlal V28.2D, V31.2S, V0.S[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
umlal2 V31.4S, V7.8H, V15.H[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
@@ -3572,10 +3572,10 @@ test:
umov X20, V11.D[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
umov X29, V7.D[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
umsubl X21, W16, W28, X6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
- umulh Z20.B, P4/M, Z20.B, Z6.B // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- umulh Z30.H, P6/M, Z30.H, Z15.H // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- umulh Z11.S, P7/M, Z11.S, Z8.S // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
- umulh Z3.D, P3/M, Z3.D, Z2.D // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+ umulh Z20.B, P4/M, Z20.B, Z6.B // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ umulh Z30.H, P6/M, Z30.H, Z15.H // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ umulh Z11.S, P7/M, Z11.S, Z8.S // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+ umulh Z3.D, P3/M, Z3.D, Z2.D // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
umulh X23, X22, X19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
umull X5, W17, W23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
umull V27.4S, V1.4H, V8.H[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
@@ -3584,10 +3584,10 @@ test:
umull2 V28.2D, V21.4S, V1.S[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
umull V23.4S, V26.4H, V19.4H // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
umull2 V11.8H, V29.16B, V29.16B // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
- uqadd Z18.B, Z18.B, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqadd Z2.S, Z2.S, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqadd Z24.S, Z24.S, #56, LSL #0 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqadd Z6.H, Z28.H, Z5.H // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqadd Z18.B, Z18.B, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqadd Z2.S, Z2.S, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqadd Z24.S, Z24.S, #56, LSL #0 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqadd Z6.H, Z28.H, Z5.H // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqadd S0, S24, S30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
uqadd V14.2D, V22.2D, V20.2D // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
uqdecb W10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -3602,30 +3602,30 @@ test:
uqdecd X1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecd X12, VL8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecd X10, VL64, MUL #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqdecd Z0.D // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdecd Z8.D, VL3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdecd Z27.D, VL16, MUL #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecd Z0.D // UQDECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdecd Z8.D, VL3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdecd Z27.D, VL16, MUL #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqdech W30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdech W28, MUL3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdech W5, VL5, MUL #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdech X2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdech X15, VL7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdech X17, VL256, MUL #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqdech Z5.H // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdech Z16.H, VL128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdech Z27.H, VL128, MUL #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdech Z5.H // UQDECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdech Z16.H, VL128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdech Z27.H, VL128, MUL #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqdecp W19, P5.H // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
uqdecp X1, P1.B // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- uqdecp Z20.S, P0 // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ uqdecp Z20.S, P0 // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
uqdecw W17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecw W11, VL256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecw W13, MUL4, MUL #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecw X7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecw X28, VL32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqdecw X0, VL256, MUL #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqdecw Z29.S // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdecw Z22.S, VL2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqdecw Z20.S, VL2, MUL #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqdecw Z29.S // UQDECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdecw Z22.S, VL2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqdecw Z20.S, VL2, MUL #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqincb W2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincb W21, VL128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincb W0, ALL, MUL #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -3638,30 +3638,30 @@ test:
uqincd X0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincd X29, MUL4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincd X20, POW2, MUL #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqincd Z29.D // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqincd Z4.D, VL64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqincd Z12.D, VL6, MUL #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincd Z29.D // UQINCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqincd Z4.D, VL64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqincd Z12.D, VL6, MUL #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqinch W4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqinch W23, MUL3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqinch W27, VL7, MUL #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqinch X8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqinch X13, MUL3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqinch X5, MUL4, MUL #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqinch Z21.H // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqinch Z1.H, VL8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqinch Z7.H, VL7, MUL #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqinch Z21.H // UQINCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqinch Z1.H, VL8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqinch Z7.H, VL7, MUL #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqincp W4, P5.D // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
uqincp X13, P5.D // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
- uqincp Z1.S, P0 // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+ uqincp Z1.S, P0 // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
uqincw W13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincw W26, VL8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincw W3, VL16, MUL #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincw X26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincw X13, VL256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
uqincw X29, VL7, MUL #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
- uqincw Z26.S // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqincw Z31.S, VL5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
- uqincw Z12.S, VL7, MUL #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+ uqincw Z26.S // UQINCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqincw Z31.S, VL5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqincw Z12.S, VL7, MUL #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqrshl S17, S5, S8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
uqrshl V25.8B, V13.8B, V23.8B // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
uqrshrn B12, H9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
@@ -3692,10 +3692,10 @@ test:
uqshrn2 V23.16B, V16.8H, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
uqshrn2 V1.8H, V12.4S, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
uqshrn2 V30.4S, V29.2D, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
- uqsub Z26.B, Z26.B, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqsub Z19.S, Z19.S, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqsub Z15.H, Z15.H, #104, LSL #8 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
- uqsub Z25.D, Z13.D, Z19.D // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+ uqsub Z26.B, Z26.B, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqsub Z19.S, Z19.S, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqsub Z15.H, Z15.H, #104, LSL #8 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uqsub Z25.D, Z13.D, Z19.D // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uqsub S16, S21, S6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
uqsub V19.4S, V0.4S, V5.4S // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
uqxtn S3, D27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
@@ -3747,12 +3747,12 @@ test:
usubl2 V12.8H, V23.16B, V15.16B // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
usubw V30.8H, V12.8H, V20.8B // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
usubw2 V2.4S, V0.4S, V30.8H // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
- uunpkhi Z26.D, Z26.S // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
- uunpklo Z10.S, Z11.H // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+ uunpkhi Z26.D, Z26.S // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+ uunpklo Z10.S, Z11.H // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
uxtb W2, W23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
- uxtb Z1.D, P2/M, Z11.D // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
- uxth Z6.S, P3/M, Z18.S // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
- uxtw Z23.D, P4/M, Z3.D // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+ uxtb Z1.D, P2/M, Z11.D // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ uxth Z6.S, P3/M, Z18.S // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+ uxtw Z23.D, P4/M, Z3.D // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
uxth W7, W14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
uxtl V1.4S, V22.4H // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
uxtl2 V14.8H, V3.16B // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
@@ -3781,12 +3781,12 @@ test:
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 376600
-# CHECK-NEXT: Total Cycles: 292826
-# CHECK-NEXT: Total uOps: 532700
+# CHECK-NEXT: Total Cycles: 308524
+# CHECK-NEXT: Total uOps: 591500
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 1.82
-# CHECK-NEXT: IPC: 1.29
+# CHECK-NEXT: uOps Per Cycle: 1.92
+# CHECK-NEXT: IPC: 1.22
# CHECK-NEXT: Block RThroughput: 854.0
# CHECK: Resources:
@@ -3801,14 +3801,17 @@ test:
# CHECK-NEXT: [8] - V1UnitM0:1
# CHECK-NEXT: [9] - V1UnitM1:1
# CHECK-NEXT: [10] - V1UnitS:2
-# CHECK-NEXT: [11] - V1UnitV:4 V1UnitV0, V1UnitV1, V1UnitV2, V1UnitV3
-# CHECK-NEXT: [12] - V1UnitV0:1
-# CHECK-NEXT: [13] - V1UnitV1:1
-# CHECK-NEXT: [14] - V1UnitV2:1
-# CHECK-NEXT: [15] - V1UnitV3:1
-# CHECK-NEXT: [16] - V1UnitV01:2 V1UnitV0, V1UnitV1
-# CHECK-NEXT: [17] - V1UnitV02:2 V1UnitV0, V1UnitV2
-# CHECK-NEXT: [18] - V1UnitV13:2 V1UnitV1, V1UnitV3
+# CHECK-NEXT: [11] - V1UnitSVE0:2 V1UnitV0, V1UnitV2
+# CHECK-NEXT: [12] - V1UnitSVE1:2 V1UnitV1, V1UnitV3
+# CHECK-NEXT: [13] - V1UnitSVE01:4 V1UnitV0, V1UnitV1, V1UnitV2, V1UnitV3
+# CHECK-NEXT: [14] - V1UnitV:4 V1UnitV0, V1UnitV1, V1UnitV2, V1UnitV3
+# CHECK-NEXT: [15] - V1UnitV0:1
+# CHECK-NEXT: [16] - V1UnitV1:1
+# CHECK-NEXT: [17] - V1UnitV2:1
+# CHECK-NEXT: [18] - V1UnitV3:1
+# CHECK-NEXT: [19] - V1UnitV01:2 V1UnitV0, V1UnitV1
+# CHECK-NEXT: [20] - V1UnitV02:2 V1UnitV0, V1UnitV2
+# CHECK-NEXT: [21] - V1UnitV13:2 V1UnitV1, V1UnitV3
# CHECK: Scheduling Info:
# CHECK-NEXT: [1]: #uOps
@@ -3820,9 +3823,9 @@ test:
# CHECK-NEXT: [7]: Instruction
# CHECK-NEXT: [8]: Comment if any
# CHECK-NEXT: [1] [2] [3] [4] [5] [6] [7] [8]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv1i64 | abs d15, d11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ABSv2i32 | abs v25.2s, v25.2s // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ABS_ZPmZ_B | abs z26.b, p6/m, z27.b // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ABSv1i64 | abs d15, d11 /* ABS <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ABSv2i32 | abs v25.2s, v25.2s // ABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ABS_ZPmZ_B | abs z26.b, p6/m, z27.b // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCWr | adc w13, w6, w4 // ADC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADCXr | adc x8, x12, x10 // ADC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADCSWr | adcs w29, w7, w30 // ADCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -3839,22 +3842,22 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | add wsp, wsp, #3547, lsl #12 // ADD <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x30, #803 // ADD <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXri | add x7, x2, #319, lsl #12 // ADD <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z13.d, z13.d, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZI_D | add z16.d, z16.d, #59648 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADD_ZI_D | add z13.d, z13.d, #245 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADD_ZI_D | add z16.d, z16.d, #59648 // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWrs | add w3, w2, w21, lsl #3 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w6, w21, w17, lsl #15 // ADD <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDWrs | add w28, w30, w19, asr #30 // ADD <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDXrs | add x8, x3, x28, lsl #3 // ADD <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x12, x13, x0, lsl #44 // ADD <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | ADDXrs | add x5, x20, x28, lsr #16 // ADD <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv1i64 | add d0, d23, d21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDv4i32 | add v19.4s, v24.4s, v15.4s // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZPmZ_D | add z29.d, p5/m, z29.d, z29.d // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADD_ZZZ_H | add z10.h, z22.h, z13.h // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv4i32_v4i16 | addhn v26.4h, v5.4s, v9.4s // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDHNv8i16_v16i8 | addhn2 v1.16b, v19.8h, v6.8h // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i64p | addp d1, v14.2d // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ADDPv2i32 | addp v7.2s, v1.2s, v2.2s // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDv1i64 | add d0, d23, d21 // ADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDv4i32 | add v19.4s, v24.4s, v15.4s // ADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADD_ZPmZ_D | add z29.d, p5/m, z29.d, z29.d // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADD_ZZZ_H | add z10.h, z22.h, z13.h // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDHNv4i32_v4i16 | addhn v26.4h, v5.4s, v9.4s // ADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDHNv8i16_v16i8 | addhn2 v1.16b, v19.8h, v6.8h // ADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDPv2i64p | addp d1, v14.2d // ADDP <V><d>, <Vn>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ADDPv2i32 | addp v7.2s, v1.2s, v2.2s // ADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDPL_XXI | addpl x27, x6, #-6 // ADDPL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w17, wsp, w25 // ADDS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | adds w6, wsp, w15, uxth // ADDS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -3876,34 +3879,34 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSXrs | adds x0, x13, x4, lsl #2 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x4, x7, x6, lsl #31 // ADDS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ADDSXrs | adds x9, x8, x9, asr #41 // ADDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | ADDVv8i8v | addv b0, v28.8b // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | ADDVv16i8v | addv b1, v26.16b // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i16v | addv h18, v13.4h // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | ADDVv8i16v | addv h29, v17.8h // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | ADDVv4i32v | addv s22, v18.4s // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | ADDVv8i8v | addv b0, v28.8b // ADDV B<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ADDVv16i8v | addv b1, v26.16b // ADDV B<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | ADDVv4i16v | addv h18, v13.4h // ADDV H<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | ADDVv8i16v | addv h29, v17.8h // ADDV H<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | ADDVv4i32v | addv s22, v18.4s // ADDV S<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ADDVL_XXI | addvl x1, x27, #-8 // ADDVL <Xd|SP>, <Xn|SP>, #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADR | adr x3, test // ADR <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_D_0 | adr z26.d, [z1.d, z8.d] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_LSL_ZZZ_S_2 | adr z22.s, [z28.s, z8.s, lsl #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_0 | adr z11.d, [z2.d, z29.d, sxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_SXTW_ZZZ_D_2 | adr z3.d, [z9.d, z9.d, sxtw #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_0 | adr z6.d, [z7.d, z13.d, uxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ADR_UXTW_ZZZ_D_1 | adr z4.d, [z24.d, z22.d, uxtw #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_LSL_ZZZ_D_0 | adr z26.d, [z1.d, z8.d] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_LSL_ZZZ_S_2 | adr z22.s, [z28.s, z8.s, lsl #2] // ADR <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>, <mod> #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_SXTW_ZZZ_D_0 | adr z11.d, [z2.d, z29.d, sxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW ] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_SXTW_ZZZ_D_2 | adr z3.d, [z9.d, z9.d, sxtw #2] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_UXTW_ZZZ_D_0 | adr z6.d, [z7.d, z13.d, uxtw] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW ] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ADR_UXTW_ZZZ_D_1 | adr z4.d, [z24.d, z22.d, uxtw #1] // ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW #<amount>] \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADRP | adrp x0, test // ADRP <Xd>, <label> \\ Address generation \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWri | and wsp, w16, #0xe00 // AND <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXri | and x2, x22, #0x1e00 // AND <Xd|SP>, <Xn>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z1.b, z1.b, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.h, z7.h, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.s, z7.s, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z7.d, z7.d, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z1.b, z1.b, #0x70 // AND <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z7.h, z7.h, #0x60 // AND <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z7.s, z7.s, #0x2 // AND <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z7.d, z7.d, #0x4 // AND <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | and p5.b, p1/z, p6.b, p4.b // AND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w11, w14, w24 // AND <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDWrs | and w2, w21, w22, lsr #25 // AND <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x1, x20, x29 // AND <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ANDXrs | and x8, x11, x22, asr #56 // AND <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ANDv8i8 | and v29.8b, v26.8b, v26.8b // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZPmZ_D | and z17.d, p6/m, z17.d, z12.d // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZZZ | and z9.d, z5.d, z17.d // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ANDv8i8 | and v29.8b, v26.8b, v26.8b // AND <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZPmZ_D | and z17.d, p6/m, z17.d, z12.d // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZZZ | and z9.d, z5.d, z17.d // AND <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | ands w14, w8, #0x70 // ANDS <Wd>, <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | ands x4, x10, #0x60 // ANDS <Xd>, <Xn>, #<immd> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWrs | ands w29, w28, w12 // ANDS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -3911,27 +3914,27 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXrs | ands x21, x9, x6 // ANDS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | ands x10, x27, x7, asr #20 // ANDS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | ands p5.b, p1/z, p2.b, p7.b // ANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ANDV_VPZ_H | andv h7, p6, z31.h // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | ANDV_VPZ_H | andv h7, p6, z31.h // ANDV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | asr w30, w14, #5 // ASR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | asr x12, x21, #28 // ASR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_B | asr z7.b, p5/m, z7.b, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_H | asr z6.h, p6/m, z6.h, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_S | asr z28.s, p0/m, z28.s, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmI_D | asr z26.d, p5/m, z26.d, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_B | asr z10.b, z14.b, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_H | asr z23.h, z18.h, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_S | asr z29.s, z11.s, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZZI_D | asr z20.d, z26.d, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZPmI_B | asr z7.b, p5/m, z7.b, #3 // ASR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZPmI_H | asr z6.h, p6/m, z6.h, #5 // ASR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZPmI_S | asr z28.s, p0/m, z28.s, #11 // ASR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZPmI_D | asr z26.d, p5/m, z26.d, #24 // ASR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZZI_B | asr z10.b, z14.b, #3 // ASR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZZI_H | asr z23.h, z18.h, #6 // ASR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZZI_S | asr z29.s, z11.s, #6 // ASR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZZI_D | asr z20.d, z26.d, #29 // ASR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w3, w0, w20 // ASR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x7, x5, x21 // ASR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_ZPmZ_S | asr z3.s, p0/m, z3.s, z10.s // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZPmZ_S | asr z9.s, p2/m, z9.s, z8.d // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASR_WIDE_ZZZ_S | asr z26.s, z21.s, z21.d // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_B | asrd z6.b, p4/m, z6.b, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_H | asrd z19.h, p3/m, z19.h, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_S | asrd z16.s, p3/m, z16.s, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRD_ZPmI_D | asrd z9.d, p6/m, z9.d, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 1 4 4 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | ASRR_ZPmZ_B | asrr z0.b, p0/m, z0.b, z19.b // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_ZPmZ_S | asr z3.s, p0/m, z3.s, z10.s // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_WIDE_ZPmZ_S | asr z9.s, p2/m, z9.s, z8.d // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASR_WIDE_ZZZ_S | asr z26.s, z21.s, z21.d // ASR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASRD_ZPmI_B | asrd z6.b, p4/m, z6.b, #2 // ASRD <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASRD_ZPmI_H | asrd z19.h, p3/m, z19.h, #6 // ASRD <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASRD_ZPmI_S | asrd z16.s, p3/m, z16.s, #2 // ASRD <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASRD_ZPmI_D | asrd z9.d, p6/m, z9.d, #12 // ASRD <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift right for divide \\ 2 4 4 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | ASRR_ZPmZ_B | asrr z0.b, p0/m, z0.b, z19.b // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVWr | asr w24, w28, w13 // ASRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ASRVXr | asr x3, x21, x24 // ASRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | at s12e1r, x28 // AT <at_op>, <Xt> \\ No description \\ No scheduling info
@@ -3964,54 +3967,54 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.le test // B.le <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.al test // B.al <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | Bcc | b.nv test // B.nv <label> \\ Branch, immed \\ 1 1 1 2.0 V1UnitB
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | BFCVT | bfcvt h6, s20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVT_ZPmZ | bfcvt z16.h, p6/m, z1.s // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | BFCVTN | bfcvtn v12.4h, v15.4s // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | BFCVTN2 | bfcvtn2 v15.8h, v13.4s // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | BFCVTNT_ZPmZ | bfcvtnt z11.h, p7/m, z24.s // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BF16DOTlanev4bf16 | bfdot v0.2s, v24.4h, v14.2h[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZI | bfdot z24.s, z26.h, z2.h[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFDOTv8bf16 | bfdot v31.4s, v21.8h, v14.8h // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFDOT_ZZZ | bfdot z15.s, z3.h, z7.h // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 1 4 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | BFCVT | bfcvt h6, s20 // BFCVT <Hd>, <Sn> \\ Scalar convert, F32 to BF16 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | BFCVT_ZPmZ | bfcvt z16.h, p6/m, z1.s // BFCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | BFCVTN | bfcvtn v12.4h, v15.4s // BFCVTN <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | BFCVTN2 | bfcvtn2 v15.8h, v13.4s // BFCVTN2 <Vd>.<Ta>, <Vn>.4S \\ ASIMD convert, F32 to BF16 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | BFCVTNT_ZPmZ | bfcvtnt z11.h, p7/m, z24.s // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S \\ Convert, F32 to BF16 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BF16DOTlanev4bf16 | bfdot v0.2s, v24.4h, v14.2h[2] // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.2H[<index>] \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFDOT_ZZI | bfdot z24.s, z26.h, z2.h[0] // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Dot product \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BFDOTv8bf16 | bfdot v31.4s, v21.8h, v14.8h // BFDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFDOT_ZZZ | bfdot z15.s, z3.h, z7.h // BFDOT <Zda>.S, <Zn>.H, <Zm>.H \\ Dot product \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w10, w26, #31, #1 // BFI <Wd>, <Wn>, #<lsbs>, #<widths> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfi x25, x7, #8, #1 // BFI <Xd>, <Xn>, #<lsbd>, #<widthd> \\ Bit field insert/clear, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfi w30, w26, #18, #13 // BFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x15, x20, #0, #36 // BFM <Xd>, <Xn>, #<immrd>, #<immsd> \\ Bitfield move, insert \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZI | bfmlalb z13.s, z30.h, z0.h[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALB_ZZZ | bfmlalb z3.s, z14.h, z13.h // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALBIdx | bfmlalb v22.4s, v11.8h, v11.h[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALTIdx | bfmlalt v17.4s, v4.8h, v11.h[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALB | bfmlalb v13.4s, v5.8h, v17.8h // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | BFMLALT | bfmlalt v10.4s, v16.8h, v1.8h // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZI | bfmlalt z23.s, z3.h, z2.h[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | BFMLALT_ZZZ | bfmlalt z25.s, z21.h, z22.h // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 5 | 3 | 4.00 | V1UnitV | BFMMLA | bfmmla v15.4s, v28.8h, v23.8h // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 5 | 3 | 2.00 | V1UnitV, V1UnitV01 | BFMMLA_ZZZ | bfmmla z26.s, z2.h, z12.h // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 1 5 3 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFMLALB_ZZZI | bfmlalb z13.s, z30.h, z0.h[0] // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFMLALB_ZZZ | bfmlalb z3.s, z14.h, z13.h // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BFMLALBIdx | bfmlalb v22.4s, v11.8h, v11.h[5] // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BFMLALTIdx | bfmlalt v17.4s, v4.8h, v11.h[7] // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>] \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BFMLALB | bfmlalb v13.4s, v5.8h, v17.8h // BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BFMLALT | bfmlalt v10.4s, v16.8h, v1.8h // BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD multiply accumulate long \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFMLALT_ZZZI | bfmlalt z23.s, z3.h, z2.h[2] // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>] \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFMLALT_ZZZ | bfmlalt z25.s, z21.h, z22.h // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H \\ Multiply accumulate long \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 5 | 3 | 4.00 | V1UnitSVE01, V1UnitV | BFMMLA | bfmmla v15.4s, v28.8h, v23.8h // BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H \\ ASIMD matrix multiply accumulate \\ 1 5 3 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BFMMLA_ZZZ | bfmmla z26.s, z2.h, z12.h // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H \\ Matrix multiply accumulate \\ 2 5 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMWri | bfxil w27, w23, #14, #14 // BFXIL <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | BFMXri | bfxil x0, x5, #11, #22 // BFXIL <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z28.b, z28.b, #0x8f // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z18.h, z18.h, #0xff9f // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z12.s, z12.s, #0xfffffffd // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | AND_ZI | and z6.d, z6.d, #0xfffffffffffffffb // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z28.b, z28.b, #0x8f // BIC <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z18.h, z18.h, #0xff9f // BIC <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z12.s, z12.s, #0xfffffffd // BIC <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | AND_ZI | and z6.d, z6.d, #0xfffffffffffffffb // BIC <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BIC_PPzPP | bic p4.b, p4/z, p6.b, p0.b // BIC <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w0, w26, w22 // BIC <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICWrs | bic w23, w10, w7, lsl #11 // BIC <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x20, x14 // BIC <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | BICXrs | bic x21, x3, x17, lsr #35 // BIC <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv4i16 | bic v6.4h, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv8i16 | bic v23.8h, #101 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v24.2s, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv2i32 | bic v31.2s, #192 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BICv16i8 | bic v25.16b, v10.16b, v9.16b // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZPmZ_D | bic z15.d, p4/m, z15.d, z25.d // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | BIC_ZZZ | bic z7.d, z8.d, z28.d // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BICv4i16 | bic v6.4h, #217 // BIC <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BICv8i16 | bic v23.8h, #101 // BIC <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BICv2i32 | bic v24.2s, #70 // BIC <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BICv2i32 | bic v31.2s, #192 // BIC <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BICv16i8 | bic v25.16b, v10.16b, v9.16b // BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BIC_ZPmZ_D | bic z15.d, p4/m, z15.d, z25.d // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | BIC_ZZZ | bic z7.d, z8.d, z28.d // BIC <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | BICSWrs | bics w24, w1, w25 // BICS <Wd>, <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSWrs | bics w21, w0, w24, lsl #11 // BICS <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift by immed, flagset, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | BICSXrs | bics x27, x25, x10 // BICS <Xd>, <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | BICSXrs | bics x22, x6, x27, lsl #62 // BICS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BICS_PPzPP | bics p2.b, p4/z, p1.b, p7.b // BICS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BIFv8i8 | bif v0.8b, v25.8b, v4.8b // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BITv8i8 | bit v5.8b, v12.8b, v22.8b // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BIFv8i8 | bif v0.8b, v25.8b, v4.8b // BIF <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BITv8i8 | bit v5.8b, v12.8b, v22.8b // BIT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BL | bl test // BL <label> \\ Branch and link, immed \\ 2 1 1 2.0 V1UnitB,V1UnitS
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitB, V1UnitI, V1UnitS | BLR | blr x11 // BLR <Xn> \\ Branch and link, register \\ 2 1 1 2.0 V1UnitB,V1UnitS
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | BR | br x17 // BR <Xn> \\ Branch, register \\ 1 1 1 2.0 V1UnitB
@@ -4026,7 +4029,7 @@ test:
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPAS_PPzPP | brkpas p2.b, p5/z, p1.b, p3.b // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | BRKPB_PPzPP | brkpb p1.b, p0/z, p7.b, p6.b // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | BRKPBS_PPzPP | brkpbs p7.b, p1/z, p6.b, p1.b // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Loop control, based on predicate and flag setting \\ 1 3 3 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | BSLv16i8 | bsl v27.16b, v13.16b, v21.16b // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | BSLv16i8 | bsl v27.16b, v13.16b, v21.16b // BSL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD bitwise insert \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZW | cbnz w21, test // CBNZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBNZX | cbnz x26, test // CBNZ <Xt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | CBZW | cbz w6, test // CBZ <Wt>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
@@ -4043,42 +4046,42 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINCXr | cinc x2, x1, pl // CINC <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVWr | cinv w9, w12, ge // CINV <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSINVXr | cinv x9, x30, mi // CINV <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_VPZ_B | clasta b11, p4, b11, z21.b // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_RPZ_B | clasta w8, p0, w8, z6.b // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTA_ZPZ_S | clasta z25.s, p1, z25.s, z14.s // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_VPZ_D | clastb d6, p7, d6, z31.d // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_RPZ_B | clastb w28, p6, w28, z12.b // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 2 9 9 1.0 V1UnitM0,V1UnitV1
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | CLASTB_ZPZ_H | clastb z27.h, p6, z27.h, z22.h // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTA_VPZ_B | clasta b11, p4, b11, z21.b // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 3 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTA_RPZ_B | clasta w8, p0, w8, z6.b // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 3 9 9 1.0 V1UnitM0,V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTA_ZPZ_S | clasta z25.s, p1, z25.s, z14.s // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTB_VPZ_D | clastb d6, p7, d6, z31.d // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 3 | 9 | 9 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTB_RPZ_B | clastb w28, p6, w28, z12.b // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> \\ Conditional extract operations, scalar form \\ 3 9 9 1.0 V1UnitM0,V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | CLASTB_ZPZ_H | clastb z27.h, p6, z27.h, z22.h // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex // CLREX \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | CLREX | clrex #12 // CLREX #<imm> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLSv8i8 | cls v5.8b, v22.8b // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CLSv8i8 | cls v5.8b, v22.8b // CLS <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSWr | cls w25, w0 // CLS <Wd>, <Wn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLSXr | cls x22, x6 // CLS <Xd>, <Xn> \\ Count leading \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLS_ZPmZ_D | cls z28.d, p3/m, z2.d // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CLZv8i16 | clz v24.8h, v30.8h // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CLS_ZPmZ_D | cls z28.d, p3/m, z2.d // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CLZv8i16 | clz v24.8h, v30.8h // CLZ <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZWr | clz w26, w27 // CLZ <Wd>, <Wn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CLZXr | clz x4, x0 // CLZ <Xd>, <Xn> \\ Count leading zeros \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CLZ_ZPmZ_S | clz z3.s, p3/m, z18.s // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64 | cmeq d26, d5, d25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv8i16 | cmeq v9.8h, v16.8h, v24.8h // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv1i64rz | cmeq d7, d26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMEQv4i16rz | cmeq v14.4h, v18.4h, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64 | cmge d26, d21, d28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv8i16 | cmge v22.8h, v16.8h, v3.8h // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv1i64rz | cmge d30, d12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGEv16i8rz | cmge v22.16b, v30.16b, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64 | cmgt d23, d25, d12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i64 | cmgt v3.2d, v29.2d, v11.2d // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv1i64rz | cmgt d28, d14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMGTv2i32rz | cmgt v22.2s, v10.2s, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv1i64 | cmhi d29, d16, d5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHIv4i16 | cmhi v28.4h, v25.4h, v21.4h // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv1i64 | cmhs d5, d3, d12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMHSv8i8 | cmhs v6.8b, v31.8b, v12.8b // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv1i64rz | cmle d14, d21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLEv2i32rz | cmle v21.2s, v19.2s, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv1i64rz | cmlt d21, d24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMLTv4i16rz | cmlt v26.4h, v12.4h, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CLZ_ZPmZ_S | clz z3.s, p3/m, z18.s // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMEQv1i64 | cmeq d26, d5, d25 // CMEQ <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMEQv8i16 | cmeq v9.8h, v16.8h, v24.8h // CMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMEQv1i64rz | cmeq d7, d26, #0 // CMEQ <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMEQv4i16rz | cmeq v14.4h, v18.4h, #0 // CMEQ <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGEv1i64 | cmge d26, d21, d28 // CMGE <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGEv8i16 | cmge v22.8h, v16.8h, v3.8h // CMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGEv1i64rz | cmge d30, d12, #0 // CMGE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGEv16i8rz | cmge v22.16b, v30.16b, #0 // CMGE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGTv1i64 | cmgt d23, d25, d12 // CMGT <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGTv2i64 | cmgt v3.2d, v29.2d, v11.2d // CMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGTv1i64rz | cmgt d28, d14, #0 // CMGT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMGTv2i32rz | cmgt v22.2s, v10.2s, #0 // CMGT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMHIv1i64 | cmhi d29, d16, d5 // CMHI <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMHIv4i16 | cmhi v28.4h, v25.4h, v21.4h // CMHI <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMHSv1i64 | cmhs d5, d3, d12 // CMHS <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMHSv8i8 | cmhs v6.8b, v31.8b, v12.8b // CMHS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMLEv1i64rz | cmle d14, d21, #0 // CMLE <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMLEv2i32rz | cmle v21.2s, v19.2s, #0 // CMLE <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMLTv1i64rz | cmlt d21, d24, #0 // CMLT <V><d>, <V><n>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMLTv4i16rz | cmlt v26.4h, v12.4h, #0 // CMLT <Vd>.<T>, <Vn>.<T>, #0 \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w7 // CMN <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w8, sxtb // CMN <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ADDSWrx | cmn wsp, w3, uxtb #3 // CMN <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -4117,43 +4120,43 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | cmp x27, x10, lsl #1 // CMP <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x18, x12, lsl #14 // CMP <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | cmp x6, x7, lsr #0 // CMP <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZI_H | cmpeq p2.h, p0/z, z26.h, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZI_B | cmpge p1.b, p4/z, z28.b, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZI_B | cmpgt p1.b, p0/z, z13.b, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZI_D | cmphi p1.d, p3/z, z23.d, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZI_D | cmphs p7.d, p5/z, z23.d, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_PPzZI_B | cmple p5.b, p2/z, z9.b, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLO_PPzZI_S | cmplo p3.s, p5/z, z18.s, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_PPzZI_D | cmpls p6.d, p6/z, z31.d, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLT_PPzZI_H | cmplt p0.h, p6/z, z29.h, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZI_S | cmpne p5.s, p4/z, z18.s, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_S | cmpeq p6.s, p5/z, z2.s, z9.s // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_S | cmpge p7.s, p4/z, z15.s, z15.s // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_H | cmpgt p2.h, p4/z, z26.h, z11.h // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_S | cmphi p0.s, p4/z, z8.s, z4.s // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p1.d, p6/z, z26.d, z15.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_PPzZZ_B | cmpne p4.b, p3/z, z21.b, z16.b // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPEQ_PPzZZ_D | cmpeq p2.d, p3/z, z13.d, z18.d // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_WIDE_PPzZZ_B | cmpge p2.b, p3/z, z3.b, z16.d // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_WIDE_PPzZZ_H | cmpgt p2.h, p2/z, z28.h, z30.d // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_WIDE_PPzZZ_H | cmphi p0.h, p5/z, z30.h, z16.d // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_WIDE_PPzZZ_H | cmphs p7.h, p2/z, z1.h, z26.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLE_WIDE_PPzZZ_B | cmple p7.b, p7/z, z3.b, z13.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_D | cmphi p6.d, p2/z, z16.d, z16.d // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPLS_WIDE_PPzZZ_H | cmpls p3.h, p2/z, z12.h, z26.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_D | cmpgt p0.d, p4/z, z26.d, z29.d // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPNE_WIDE_PPzZZ_S | cmpne p0.s, p4/z, z30.s, z8.d // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGE_PPzZZ_D | cmpge p1.d, p3/z, z26.d, z2.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHI_PPzZZ_B | cmphi p7.b, p0/z, z25.b, z4.b // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPHS_PPzZZ_D | cmphs p4.d, p4/z, z14.d, z2.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | CMPGT_PPzZZ_S | cmpgt p2.s, p2/z, z21.s, z31.s // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 2 4 4 1.0 V1UnitV0,V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv1i64 | cmtst d10, d6, d5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CMTSTv2i64 | cmtst v13.2d, v13.2d, v13.2d // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPEQ_PPzZI_H | cmpeq p2.h, p0/z, z26.h, #-8 // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGE_PPzZI_B | cmpge p1.b, p4/z, z28.b, #-6 // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGT_PPzZI_B | cmpgt p1.b, p0/z, z13.b, #14 // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHI_PPzZI_D | cmphi p1.d, p3/z, z23.d, #12 // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHS_PPzZI_D | cmphs p7.d, p5/z, z23.d, #114 // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLE_PPzZI_B | cmple p5.b, p2/z, z9.b, #9 // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLO_PPzZI_S | cmplo p3.s, p5/z, z18.s, #87 // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLS_PPzZI_D | cmpls p6.d, p6/z, z31.d, #56 // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLT_PPzZI_H | cmplt p0.h, p6/z, z29.h, #-13 // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPNE_PPzZI_S | cmpne p5.s, p4/z, z18.s, #15 // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPEQ_PPzZZ_S | cmpeq p6.s, p5/z, z2.s, z9.s // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGE_PPzZZ_S | cmpge p7.s, p4/z, z15.s, z15.s // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGT_PPzZZ_H | cmpgt p2.h, p4/z, z26.h, z11.h // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHI_PPzZZ_S | cmphi p0.s, p4/z, z8.s, z4.s // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHS_PPzZZ_D | cmphs p1.d, p6/z, z26.d, z15.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPNE_PPzZZ_B | cmpne p4.b, p3/z, z21.b, z16.b // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPEQ_PPzZZ_D | cmpeq p2.d, p3/z, z13.d, z18.d // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGE_WIDE_PPzZZ_B | cmpge p2.b, p3/z, z3.b, z16.d // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGT_WIDE_PPzZZ_H | cmpgt p2.h, p2/z, z28.h, z30.d // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHI_WIDE_PPzZZ_H | cmphi p0.h, p5/z, z30.h, z16.d // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHS_WIDE_PPzZZ_H | cmphs p7.h, p2/z, z1.h, z26.d // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLE_WIDE_PPzZZ_B | cmple p7.b, p7/z, z3.b, z13.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHI_PPzZZ_D | cmphi p6.d, p2/z, z16.d, z16.d // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPLS_WIDE_PPzZZ_H | cmpls p3.h, p2/z, z12.h, z26.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGT_PPzZZ_D | cmpgt p0.d, p4/z, z26.d, z29.d // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPNE_WIDE_PPzZZ_S | cmpne p0.s, p4/z, z30.s, z8.d // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGE_PPzZZ_D | cmpge p1.d, p3/z, z26.d, z2.d // CMPLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHI_PPzZZ_B | cmphi p7.b, p0/z, z25.b, z4.b // CMPLO <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPHS_PPzZZ_D | cmphs p4.d, p4/z, z14.d, z2.d // CMPLS <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | CMPGT_PPzZZ_S | cmpgt p2.s, p2/z, z21.s, z31.s // CMPLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Compare and set flags \\ 3 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2],V1UnitM0
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMTSTv1i64 | cmtst d10, d6, d5 // CMTST <V><d>, <V><n>, <V><m> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CMTSTv2i64 | cmtst v13.2d, v13.2d, v13.2d // CMTST <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD compare \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGWr | cneg w3, w17, hi // CNEG <Wd>, <Wn>, <cond> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | CSNEGXr | cneg x26, x8, lo // CNEG <Xd>, <Xn>, <cond> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNOT_ZPmZ_S | cnot z7.s, p7/m, z8.s // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | CNTv16i8 | cnt v12.16b, v14.16b // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CNT_ZPmZ_H | cnt z26.h, p0/m, z27.h // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CNOT_ZPmZ_S | cnot z7.s, p7/m, z8.s // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | CNTv16i8 | cnt v12.16b, v14.16b // CNT <Vd>.<T>, <Vn>.<T> \\ ASIMD count \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CNT_ZPmZ_H | cnt z26.h, p0/m, z27.h // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x18 // CNTB <Xd> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x9, vl128 // CNTB <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTB_XPiI | cntb x28, vl8, mul #13 // CNTB <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -4167,16 +4170,16 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x23, vl3 // CNTW <Xd>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTW_XPiI | cntw x6, vl16, mul #11 // CNTW <Xd>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CNTP_XPP_S | cntp x22, p1, p2.s // CNTP <Xd>, <Pg>, <Pn>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | COMPACT_ZPZ_S | compact z17.s, p1, z18.s // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_B | mov z13.b, p0/m, b6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z3.b, p6/m, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_S | mov z11.s, p5/m, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z0.h, p0/m, #-11 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z5.b, p1/z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_H | mov z12.h, p1/z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z25.d, p3/z, #-20736 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_H | mov z24.h, p0/m, w19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_S | mov z23.s, p2/m, wsp // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | COMPACT_ZPZ_S | compact z17.s, p1, z18.s // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> \\ Conditional extract operations, SIMD&FP scalar and vector forms \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmV_B | mov z13.b, p0/m, b6 // CPY <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_B | mov z3.b, p6/m, #-118 // CPY <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_S | mov z11.s, p5/m, #-62 // CPY <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_H | mov z0.h, p0/m, #-11 // CPY <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_B | mov z5.b, p1/z, #-90 // CPY <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_H | mov z12.h, p1/z, #-118 // CPY <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_D | mov z25.d, p3/z, #-20736 // CPY <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 3 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmR_H | mov z24.h, p0/m, w19 // CPY <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 3 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmR_S | mov z23.s, p2/m, wsp // CPY <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Brr | crc32b w27, w12, w15 // CRC32B <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Hrr | crc32h w3, w15, w21 // CRC32H <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | CRC32Wrr | crc32w w9, w18, w24 // CRC32W <Wd>, <Wn>, <Wm> \\ CRC checksum ops \\ 1 2 1 1.0 V1UnitM0
@@ -4219,718 +4222,718 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x27 // DECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x18, vl32 // DECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECW_XPiI | decw x29, vl6, mul #3 // DECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z19.d // DECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z22.d, mul3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECD_ZPiI | decd z1.d, vl128, mul #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z23.h // DECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z29.h, vl5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECH_ZPiI | dech z28.h, vl64, mul #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z8.s // DECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z4.s, vl64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DECW_ZPiI | decw z27.s, vl4, mul #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECD_ZPiI | decd z19.d // DECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECD_ZPiI | decd z22.d, mul3 // DECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECD_ZPiI | decd z1.d, vl128, mul #11 // DECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECH_ZPiI | dech z23.h // DECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECH_ZPiI | dech z29.h, vl5 // DECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECH_ZPiI | dech z28.h, vl64, mul #16 // DECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECW_ZPiI | decw z8.s // DECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECW_ZPiI | decw z4.s, vl64 // DECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DECW_ZPiI | decw z27.s, vl4, mul #10 // DECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DECP_XP_B | decp x6, p6.b // DECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | DECP_ZP_H | decp z22.h, p1.h // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | DECP_ZP_H | decp z22.h, p1.h // DECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb sy // DMB <option> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DMB | dmb nshst // DMB #<imm> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | DRPS | drps // DRPS \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v25.b[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h2, v31.h[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s10, v2.s[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d24, v7.d[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i8lane | dup v25.8b, v21.b[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv8i16lane | dup v28.8h, v29.h[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv4i32lane | dup v24.4s, v9.s[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPv2i64lane | dup v20.2d, v3.d[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi8 | mov b15, v25.b[12] // DUP B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi16 | mov h2, v31.h[5] // DUP H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi32 | mov s10, v2.s[1] // DUP S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi64 | mov d24, v7.d[1] // DUP D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPv8i8lane | dup v25.8b, v21.b[4] // DUP <Vd>.<Tb>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPv8i16lane | dup v28.8h, v29.h[1] // DUP <Vd>.<Th>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPv4i32lane | dup v24.4s, v9.s[3] // DUP <Vd>.<Ts>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPv2i64lane | dup v20.2d, v3.d[0] // DUP <Vd>.<Td>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUPv4i32gpr | dup v19.4s, w27 // DUP <Vd>.<T>, <R><n> \\ ASIMD duplicate, gen reg \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z15.h, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z22.d, #-14 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z2.b, z26.b[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z23.h, z22.h[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z29.s, z30.s[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z4.d, d7 // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_B | mov z30.b, #16 // DUP <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_H | mov z15.h, #105 // DUP <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_D | mov z22.d, #-14 // DUP <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_B | mov z2.b, z26.b[27] // DUP <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_H | mov z23.h, z22.h[2] // DUP <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_S | mov z29.s, z30.s[15] // DUP <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_D | mov z4.d, d7 // DUP <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_D | mov z25.d, x28 // DUP <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_S | mov z18.s, wsp // DUP <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z18.b, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z12.h, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.s, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUPM_ZI | dupm z16.d, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUPM_ZI | dupm z18.b, #0x70 // DUPM <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUPM_ZI | dupm z12.h, #0x60 // DUPM <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUPM_ZI | dupm z16.s, #0x2 // DUPM <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUPM_ZI | dupm z16.d, #0x4 // DUPM <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w29, w4, w19 // EON <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONWrs | eon w14, w24, w28, asr #14 // EON <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x19, x12, x2 // EON <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EONXrs | eon x23, x23, x23, asr #41 // EON <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z7.b, z7.b, #0x8f // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z3.h, z3.h, #0xff9f // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z2.s, z2.s, #0xfffffffd // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z24.d, z24.d, #0xfffffffffffffffb // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z7.b, z7.b, #0x8f // EON <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z3.h, z3.h, #0xff9f // EON <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z2.s, z2.s, #0xfffffffd // EON <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z24.d, z24.d, #0xfffffffffffffffb // EON <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWri | eor wsp, w4, #0xe00 // EOR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXri | eor x27, x25, #0x1e00 // EOR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z19.b, z19.b, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z18.h, z18.h, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z10.s, z10.s, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZI | eor z29.d, z29.d, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z19.b, z19.b, #0x70 // EOR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z18.h, z18.h, #0x60 // EOR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z10.s, z10.s, #0x2 // EOR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZI | eor z29.d, z29.d, #0x4 // EOR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | eor p6.b, p7/z, p3.b, p5.b // EOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w27, w2 // EOR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORWrs | eor w8, w7, w29, asr #30 // EOR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x22, x16, x6 // EOR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EORXrs | eor x0, x23, x30, lsl #11 // EOR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EORv16i8 | eor v8.16b, v10.16b, v19.16b // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZPmZ_H | eor z8.h, p3/m, z8.h, z14.h // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | EOR_ZZZ | eor z30.d, z26.d, z20.d // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | EORv16i8 | eor v8.16b, v10.16b, v19.16b // EOR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZPmZ_H | eor z8.h, p3/m, z8.h, z14.h // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | EOR_ZZZ | eor z30.d, z26.d, z20.d // EOR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | eors p1.b, p0/z, p3.b, p1.b // EORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | EORV_VPZ_H | eorv h17, p1, z15.h // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | EORV_VPZ_H | eorv h17, p1, z15.h // EORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | ERET | eret // ERET \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | esb // ESB \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv8i8 | ext v12.8b, v22.8b, v31.8b, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | EXTv16i8 | ext v17.16b, v18.16b, v8.16b, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | EXTv8i8 | ext v12.8b, v22.8b, v31.8b, #6 // EXT <Vd>.8B, <Vn>.8B, <Vm>.8B, #<index8> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | EXTv16i8 | ext v17.16b, v18.16b, v8.16b, #10 // EXT <Vd>.16B, <Vn>.16B, <Vm>.16B, #<index16> \\ ASIMD extract \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w19, w20, #16 // EXTR <Wd>, <Wn>, <Wn>, #<lsbs> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | EXTRWrri | extr w27, w4, w5, #23 // EXTR <Wd>, <Wn>, <Wm>, #<lsbs> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x25, x22, #62 // EXTR <Xd>, <Xn>, <Xn>, #<lsbd> \\ Bitfield extract, one reg \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | EXTRXrri | extr x0, x12, x13, #17 // EXTR <Xd>, <Xn>, <Xm>, #<lsbd> \\ Bitfield extract, two regs \\ 1 3 3 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD16 | fabd h27, h20, h17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABD32 | fabd s16, s29, s6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv8f16 | fabd v13.8h, v28.8h, v12.8h // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABDv4f32 | fabd v12.4s, v4.4s, v31.4s // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABD_ZPmZ_H | fabd z11.h, p6/m, z11.h, z5.h // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSHr | fabs h25, h7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSSr | fabs s17, s12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSDr | fabs d30, d8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv4f32 | fabs v16.4s, v31.4s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FABSv2f32 | fabs v17.2s, v28.2s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FABS_ZPmZ_S | fabs z26.s, p7/m, z24.s // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p0.h, p5/z, z15.h, z18.h // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_S | facgt p7.s, p7/z, z10.s, z4.s // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE16 | facge h24, h26, h29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGE64 | facge d25, d24, d7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv4f16 | facge v25.4h, v16.4h, v11.4h // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGEv2f32 | facge v19.2s, v24.2s, v5.2s // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT16 | facgt h0, h4, h10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGT32 | facgt s29, s3, s2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv8f16 | facgt v22.8h, v14.8h, v31.8h // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FACGTv4f32 | facgt v22.4s, v8.4s, v2.4s // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGE_PPzZZ_H | facge p7.h, p5/z, z27.h, z22.h // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FACGT_PPzZZ_H | facgt p5.h, p5/z, z16.h, z31.h // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmI_H | fadd z4.h, p7/m, z4.h, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDHrr | fadd h23, h27, h22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDSrr | fadd s1, s23, s27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDDrr | fadd d16, d15, d21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v7.2d, v30.2d, v20.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDv2f64 | fadd v16.2d, v13.2d, v11.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZPmZ_H | fadd z26.h, p4/m, z26.h, z1.h // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FADD_ZZZ_S | fadd z23.s, z7.s, z16.s // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 19 | 19 | 0.06 | V1UnitV[18], V1UnitV0[18], V1UnitV01[18], V1UnitV02[18] | FADDA_VPZ_H | fadda h8, p3, h8, z28.h // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 1 19 19 0.06 V1UnitV0[18]
-# CHECK-NEXT: 1 | 11 | 11 | 0.10 | V1UnitV[10], V1UnitV0[10], V1UnitV01[10], V1UnitV02[10] | FADDA_VPZ_S | fadda s11, p6, s11, z1.s // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 1 11 11 0.1 V1UnitV0[10]
-# CHECK-NEXT: 1 | 8 | 8 | 0.67 | V1UnitV[3], V1UnitV01[3] | FADDA_VPZ_D | fadda d27, p4, d27, z27.d // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 1 8 8 0.67 V1UnitV01[3]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i16p | faddp h10, v19.2h // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2i64p | faddp d11, v28.2d // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv2f64 | faddp v16.2d, v11.2d, v5.2d // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FADDPv4f32 | faddp v16.4s, v11.4s, v18.4s // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FADDV_VPZ_H | faddv h21, p2, z3.h // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
-# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FADDV_VPZ_S | faddv s16, p2, z25.s // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FADDV_VPZ_D | faddv d18, p4, z7.d // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FCADD_ZPmZ_H | fcadd z29.h, p2/m, z29.h, z15.h, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPHrr | fccmp h31, h3, #11, hs // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPSrr | fccmp s5, s6, #0, lo // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPDrr | fccmp d17, d15, #0, ne // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEHrr | fccmpe h6, h1, #12, ne // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPESrr | fccmpe s16, s13, #10, vs // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEDrr | fccmpe d17, d14, #15, ls // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZZ_D | fcmeq p7.d, p1/z, z23.d, z21.d // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_H | fcmge p6.h, p1/z, z19.h, z10.h // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p5.s, p2/z, z29.s, z5.s // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZZ_D | fcmne p5.d, p0/z, z22.d, z15.d // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMUO_PPzZZ_D | fcmuo p0.d, p2/z, z15.d, z23.d // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMEQ_PPzZ0_D | fcmeq p4.d, p5/z, z19.d, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZ0_D | fcmge p0.d, p5/z, z10.d, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZ0_D | fcmgt p6.d, p1/z, z8.d, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLE_PPzZ0_D | fcmle p2.d, p4/z, z26.d, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMLT_PPzZ0_D | fcmlt p5.d, p5/z, z23.d, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMNE_PPzZ0_H | fcmne p2.h, p3/z, z7.h, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ16 | fcmeq h30, h6, h1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQ32 | fcmeq s17, s0, s21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2f32 | fcmeq v19.2s, v31.2s, v19.2s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv4f32 | fcmeq v12.4s, v11.4s, v26.4s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i16rz | fcmeq h19, h23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv1i32rz | fcmeq s25, s18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i32rz | fcmeq v8.2s, v16.2s, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMEQv2i64rz | fcmeq v18.2d, v17.2d, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE16 | fcmge h1, h16, h12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGE64 | fcmge d29, d9, d3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv8f16 | fcmge v20.8h, v19.8h, v22.8h // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2f64 | fcmge v17.2d, v11.2d, v13.2d // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i16rz | fcmge h10, h23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv1i64rz | fcmge d5, d17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv4i16rz | fcmge v18.4h, v27.4h, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGEv2i32rz | fcmge v17.2s, v11.2s, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT16 | fcmgt h4, h5, h0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGT32 | fcmgt s13, s20, s3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8f16 | fcmgt v24.8h, v24.8h, v28.8h // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv4f32 | fcmgt v19.4s, v20.4s, v13.4s // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i16rz | fcmgt h0, h18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv1i64rz | fcmgt d30, d23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv8i16rz | fcmgt v0.8h, v11.8h, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMGTv2i64rz | fcmgt v19.2d, v31.2d, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_H | fcmla z20.h, z12.h, z4.h[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZZZI_S | fcmla z1.s, z27.s, z6.s[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 5 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCMLA_ZPmZZ_S | fcmla z25.s, p3/m, z13.s, z23.s, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 1 5 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGE_PPzZZ_S | fcmge p5.s, p3/z, z12.s, z28.s // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i16rz | fcmle h18, h28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv1i64rz | fcmle d18, d16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv8i16rz | fcmle v16.8h, v11.8h, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLEv4i32rz | fcmle v22.4s, v30.4s, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMGT_PPzZZ_S | fcmgt p1.s, p1/z, z24.s, z13.s // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i16rz | fcmlt h23, h7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv1i64rz | fcmlt d22, d28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv4i16rz | fcmlt v8.4h, v2.4h, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FCMLTv2i64rz | fcmlt v7.2d, v16.2d, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHrr | fcmp h5, h21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHri | fcmp h5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSrr | fcmp s7, s0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSri | fcmp s28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDrr | fcmp d1, d27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDri | fcmp d16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHrr | fcmpe h22, h21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHri | fcmpe h13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESrr | fcmpe s11, s29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESri | fcmpe s15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDrr | fcmpe d27, d22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDri | fcmpe d9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_H | fmov z2.h, p7/m, #0.50000000 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELHrrr | fcsel h26, h2, h11, hs // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELSrrr | fcsel s5, s1, s4, vc // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCSELDrrr | fcsel d14, d0, d19, eq // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSHr | fcvt s13, h13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDHr | fcvt d10, h6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHSr | fcvt h1, s1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTDSr | fcvt d9, s23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTHDr | fcvt h17, d16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTSDr | fcvt s31, d27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_HtoS | fcvt z0.s, p1/m, z4.h // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_HtoD | fcvt z6.d, p0/m, z17.h // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVT_ZPmZ_StoH | fcvt z7.h, p7/m, z5.s // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_StoD | fcvt z11.d, p2/m, z18.s // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoH | fcvt z26.h, p0/m, z30.d // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVT_ZPmZ_DtoS | fcvt z13.s, p2/m, z3.d // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWHr | fcvtas w23, h3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXHr | fcvtas x14, h29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWSr | fcvtas w0, s13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXSr | fcvtas x23, s15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWDr | fcvtas w1, d31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXDr | fcvtas x2, d3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1f16 | fcvtas h27, h24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1i32 | fcvtas s16, s0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv1i64 | fcvtas d14, d7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f16 | fcvtas v5.4h, v16.4h // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTASv8f16 | fcvtas v13.8h, v30.8h // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f32 | fcvtas v12.2s, v1.2s // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTASv4f32 | fcvtas v9.4s, v31.4s // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTASv2f64 | fcvtas v2.2d, v22.2d // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWHr | fcvtau w13, h27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXHr | fcvtau x8, h12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWSr | fcvtau w20, s10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXSr | fcvtau x27, s22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWDr | fcvtau w6, d26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXDr | fcvtau x16, d13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1f16 | fcvtau h6, h29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1i32 | fcvtau s23, s7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv1i64 | fcvtau d1, d26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f16 | fcvtau v12.4h, v13.4h // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTAUv8f16 | fcvtau v21.8h, v0.8h // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f32 | fcvtau v31.2s, v6.2s // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTAUv4f32 | fcvtau v29.4s, v26.4s // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTAUv2f64 | fcvtau v9.2d, v7.2d // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv4i16 | fcvtl v30.4s, v4.4h // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv2i32 | fcvtl v28.2d, v13.2s // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTLv8i16 | fcvtl2 v14.4s, v29.8h // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTLv4i32 | fcvtl2 v0.2d, v9.4s // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWHr | fcvtms w15, h1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXHr | fcvtms x5, h2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWSr | fcvtms w1, s16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXSr | fcvtms x27, s22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWDr | fcvtms w18, d21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXDr | fcvtms x6, d26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1f16 | fcvtms h19, h29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1i32 | fcvtms s30, s14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv1i64 | fcvtms d8, d20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f16 | fcvtms v27.4h, v7.4h // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTMSv8f16 | fcvtms v26.8h, v11.8h // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f32 | fcvtms v13.2s, v2.2s // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMSv4f32 | fcvtms v18.4s, v21.4s // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMSv2f64 | fcvtms v15.2d, v16.2d // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWHr | fcvtmu w20, h6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXHr | fcvtmu x7, h18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWSr | fcvtmu w24, s19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXSr | fcvtmu x7, s15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWDr | fcvtmu w16, d16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXDr | fcvtmu x1, d18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1f16 | fcvtmu h20, h13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1i32 | fcvtmu s28, s25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv1i64 | fcvtmu d3, d27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f16 | fcvtmu v18.4h, v2.4h // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTMUv8f16 | fcvtmu v10.8h, v11.8h // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f32 | fcvtmu v27.2s, v14.2s // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTMUv4f32 | fcvtmu v31.4s, v4.4s // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTMUv2f64 | fcvtmu v6.2d, v26.2d // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv4i16 | fcvtn v4.4h, v22.4s // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv2i32 | fcvtn v14.2s, v2.2d // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNv8i16 | fcvtn2 v0.8h, v30.4s // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNv4i32 | fcvtn2 v21.4s, v13.2d // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWHr | fcvtns w19, h15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXHr | fcvtns x20, h0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWSr | fcvtns w10, s5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXSr | fcvtns x14, s12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWDr | fcvtns w30, d2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXDr | fcvtns x0, d12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1f16 | fcvtns h16, h25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1i32 | fcvtns s23, s19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv1i64 | fcvtns d30, d1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f16 | fcvtns v28.4h, v19.4h // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTNSv8f16 | fcvtns v19.8h, v19.8h // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f32 | fcvtns v20.2s, v4.2s // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNSv4f32 | fcvtns v28.4s, v29.4s // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNSv2f64 | fcvtns v21.2d, v31.2d // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWHr | fcvtnu w12, h3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXHr | fcvtnu x23, h27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWSr | fcvtnu w4, s23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXSr | fcvtnu x5, s28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWDr | fcvtnu w4, d11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXDr | fcvtnu x12, d8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1f16 | fcvtnu h24, h22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1i32 | fcvtnu s29, s22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv1i64 | fcvtnu d18, d15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f16 | fcvtnu v5.4h, v12.4h // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTNUv8f16 | fcvtnu v26.8h, v20.8h // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f32 | fcvtnu v15.2s, v1.2s // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTNUv4f32 | fcvtnu v7.4s, v16.4s // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTNUv2f64 | fcvtnu v13.2d, v8.2d // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWHr | fcvtps w27, h14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXHr | fcvtps x26, h20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWSr | fcvtps w5, s27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXSr | fcvtps x29, s6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWDr | fcvtps w23, d25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXDr | fcvtps x10, d16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1f16 | fcvtps h31, h22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1i32 | fcvtps s3, s3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv1i64 | fcvtps d10, d26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f16 | fcvtps v13.4h, v26.4h // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTPSv8f16 | fcvtps v26.8h, v10.8h // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f32 | fcvtps v18.2s, v8.2s // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPSv4f32 | fcvtps v12.4s, v18.4s // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPSv2f64 | fcvtps v3.2d, v2.2d // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWHr | fcvtpu w25, h22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXHr | fcvtpu x4, h24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWSr | fcvtpu w13, s0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXSr | fcvtpu x0, s17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWDr | fcvtpu w16, d25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXDr | fcvtpu x15, d12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1f16 | fcvtpu h1, h29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1i32 | fcvtpu s21, s30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv1i64 | fcvtpu d16, d26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f16 | fcvtpu v2.4h, v25.4h // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTPUv8f16 | fcvtpu v24.8h, v26.8h // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f32 | fcvtpu v6.2s, v23.2s // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTPUv4f32 | fcvtpu v10.4s, v6.4s // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTPUv2f64 | fcvtpu v7.2d, v23.2d // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv1i64 | fcvtxn s29, d4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv2f32 | fcvtxn v25.2s, v15.2d // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTXNv4f32 | fcvtxn2 v21.4s, v6.2d // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWHri | fcvtzs w28, h26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXHri | fcvtzs x22, h17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWSri | fcvtzs w17, s23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXSri | fcvtzs x15, s30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWDri | fcvtzs w13, d17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXDri | fcvtzs x14, d9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWHr | fcvtzs w15, h10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXHr | fcvtzs x4, h21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWSr | fcvtzs w1, s4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXSr | fcvtzs x27, s27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWDr | fcvtzs w24, d30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXDr | fcvtzs x18, d21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSh | fcvtzs h29, h23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSs | fcvtzs s23, s15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSd | fcvtzs d20, d26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i16_shift | fcvtzs v20.4h, v24.4h, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZSv8i16_shift | fcvtzs v18.8h, v10.8h, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i32_shift | fcvtzs v16.2s, v2.2s, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4i32_shift | fcvtzs v22.4s, v18.4s, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2i64_shift | fcvtzs v14.2d, v30.2d, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1f16 | fcvtzs h16, h27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1i32 | fcvtzs s4, s5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv1i64 | fcvtzs d4, d23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f16 | fcvtzs v8.4h, v16.4h // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZSv8f16 | fcvtzs v2.8h, v16.8h // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f32 | fcvtzs v27.2s, v28.2s // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZSv4f32 | fcvtzs v29.4s, v18.4s // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZSv2f64 | fcvtzs v13.2d, v31.2d // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoH | fcvtzs z1.h, p2/m, z6.h // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoS | fcvtzs z19.s, p4/m, z16.h // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZS_ZPmZ_HtoD | fcvtzs z14.d, p0/m, z6.h // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_StoS | fcvtzs z25.s, p5/m, z23.s // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZS_ZPmZ_StoD | fcvtzs z3.d, p1/m, z31.s // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoS | fcvtzs z28.s, p5/m, z23.d // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZS_ZPmZ_DtoD | fcvtzs z22.d, p6/m, z29.d // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWHri | fcvtzu w12, h19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXHri | fcvtzu x17, h23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWSri | fcvtzu w16, s3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXSri | fcvtzu x27, s15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWDri | fcvtzu w21, d10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXDri | fcvtzu x26, d30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWHr | fcvtzu w26, h30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXHr | fcvtzu x9, h11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWSr | fcvtzu w20, s16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXSr | fcvtzu x7, s21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWDr | fcvtzu w25, d30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXDr | fcvtzu x13, d8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUh | fcvtzu h19, h8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUs | fcvtzu s25, s27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUd | fcvtzu d30, d16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i16_shift | fcvtzu v19.4h, v26.4h, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZUv8i16_shift | fcvtzu v27.8h, v6.8h, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i32_shift | fcvtzu v30.2s, v4.2s, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4i32_shift | fcvtzu v31.4s, v6.4s, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2i64_shift | fcvtzu v10.2d, v12.2d, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1f16 | fcvtzu h25, h30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1i32 | fcvtzu s2, s19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv1i64 | fcvtzu d4, d7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f16 | fcvtzu v3.4h, v2.4h // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FCVTZUv8f16 | fcvtzu v30.8h, v25.8h // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f32 | fcvtzu v25.2s, v25.2s // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FCVTZUv4f32 | fcvtzu v21.4s, v2.4s // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FCVTZUv2f64 | fcvtzu v23.2d, v15.2d // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoH | fcvtzu z15.h, p0/m, z8.h // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoS | fcvtzu z8.s, p5/m, z18.h // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | FCVTZU_ZPmZ_HtoD | fcvtzu z11.d, p4/m, z24.h // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_StoS | fcvtzu z13.s, p7/m, z8.s // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | FCVTZU_ZPmZ_StoD | fcvtzu z20.d, p2/m, z13.s // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoS | fcvtzu z31.s, p3/m, z20.d // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZU_ZPmZ_DtoD | fcvtzu z4.d, p1/m, z25.d // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitV[2], V1UnitV02[2] | FDIVHrr | fdiv h1, h26, h23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.67 | V1UnitV[3], V1UnitV02[3] | FDIVSrr | fdiv s31, s18, s12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
-# CHECK-NEXT: 1 | 15 | 15 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVDrr | fdiv d6, d3, d0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
-# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FDIVv4f16 | fdiv v21.4h, v15.4h, v22.4h // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
-# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitV[13], V1UnitV02[13] | FDIVv8f16 | fdiv v31.8h, v12.8h, v15.8h // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
-# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitV[5], V1UnitV02[5] | FDIVv2f32 | fdiv v15.2s, v23.2s, v2.2s // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
-# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitV[9], V1UnitV02[9] | FDIVv4f32 | fdiv v7.4s, v27.4s, v22.4s // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
-# CHECK-NEXT: 1 | 15 | 15 | 0.14 | V1UnitV[14], V1UnitV02[14] | FDIVv2f64 | fdiv v31.2d, v25.2d, v8.2d // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.14 V1UnitV02[14]
-# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FDIV_ZPmZ_H | fdiv z21.h, p7/m, z21.h, z15.h // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
-# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FDIV_ZPmZ_S | fdiv z17.s, p4/m, z17.s, z20.s // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
-# CHECK-NEXT: 1 | 15 | 15 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FDIV_ZPmZ_D | fdiv z13.d, p3/m, z13.d, z28.d // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
-# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FDIVR_ZPmZ_H | fdivr z29.h, p4/m, z29.h, z1.h // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 1 13 13 0.08 V1UnitV0[12]
-# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FDIVR_ZPmZ_S | fdivr z13.s, p0/m, z13.s, z29.s // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 1 10 10 0.11 V1UnitV0[9]
-# CHECK-NEXT: 1 | 15 | 15 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FDIVR_ZPmZ_D | fdivr z14.d, p3/m, z14.d, z31.d // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 1 15 15 0.07 V1UnitV0[14]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z19.s, #0.50000000 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FEXPA_ZZ_H | fexpa z6.h, z3.h // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAD_ZPmZZ_S | fmad z9.s, p5/m, z9.s, z7.s // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDHrrr | fmadd h27, h0, h6, h28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDSrrr | fmadd s13, s24, s15, s5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMADDDrrr | fmadd d19, d4, d2, d17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmI_D | fmax z25.d, p2/m, z25.d, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXHrr | fmax h8, h7, h11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXSrr | fmax s9, s21, s2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXDrr | fmax d4, d26, d26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v0.4s, v13.4s, v21.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXv4f32 | fmax v12.4s, v27.4s, v11.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAX_ZPmZ_S | fmax z16.s, p5/m, z16.s, z12.s // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmI_D | fmaxnm z25.d, p5/m, z25.d, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMHrr | fmaxnm h29, h13, h14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMSrr | fmaxnm s25, s20, s0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMDrr | fmaxnm d29, d25, d16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv4f32 | fmaxnm v6.4s, v3.4s, v3.4s // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMv2f64 | fmaxnm v9.2d, v15.2d, v11.2d // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMAXNM_ZPmZ_S | fmaxnm z6.s, p5/m, z6.s, z17.s // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i16p | fmaxnmp h25, v19.2h // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv2i64p | fmaxnmp d17, v29.2d // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v31.4s, v4.4s, v2.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXNMPv4f32 | fmaxnmp v23.4s, v15.4s, v1.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i16v | fmaxnmv h0, v13.4h // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXNMVv8i16v | fmaxnmv h12, v11.8h // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXNMVv4i32v | fmaxnmv s28, v31.4s // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXNMV_VPZ_H | fmaxnmv h9, p3, z2.h // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
-# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMAXNMV_VPZ_S | fmaxnmv s26, p6, z0.s // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMAXNMV_VPZ_D | fmaxnmv d7, p1, z29.d // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i16p | fmaxp h15, v25.2h // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2i32p | fmaxp s6, v2.2s // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv2f32 | fmaxp v21.2s, v17.2s, v13.2s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMAXPv4f32 | fmaxp v10.4s, v5.4s, v25.4s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i16v | fmaxv h23, v4.4h // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMAXVv8i16v | fmaxv h25, v15.8h // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMAXVv4i32v | fmaxv s23, v2.4s // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMAXV_VPZ_H | fmaxv h12, p0, z22.h // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
-# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMAXV_VPZ_S | fmaxv s24, p5, z12.s // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMAXV_VPZ_D | fmaxv d1, p6, z25.d // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmI_D | fmin z24.d, p4/m, z24.d, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINHrr | fmin h4, h13, h17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINSrr | fmin s1, s14, s22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINDrr | fmin d18, d19, d22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv4f32 | fmin v6.4s, v25.4s, v27.4s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINv2f32 | fmin v12.2s, v30.2s, v25.2s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMIN_ZPmZ_H | fmin z11.h, p3/m, z11.h, z16.h // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmI_H | fminnm z19.h, p4/m, z19.h, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMHrr | fminnm h29, h23, h17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMSrr | fminnm s24, s14, s30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMDrr | fminnm d0, d26, d8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv2f32 | fminnm v16.2s, v23.2s, v27.2s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMv4f32 | fminnm v23.4s, v19.4s, v22.4s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMINNM_ZPmZ_S | fminnm z24.s, p3/m, z24.s, z13.s // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i16p | fminnmp h20, v14.2h // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2i64p | fminnmp d15, v8.2d // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv2f64 | fminnmp v27.2d, v27.2d, v16.2d // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINNMPv4f32 | fminnmp v2.4s, v14.4s, v14.4s // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i16v | fminnmv h19, v25.4h // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMINNMVv8i16v | fminnmv h23, v17.8h // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINNMVv4i32v | fminnmv s29, v17.4s // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINNMV_VPZ_H | fminnmv h24, p3, z1.h // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
-# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMINNMV_VPZ_S | fminnmv s30, p3, z9.s // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMINNMV_VPZ_D | fminnmv d18, p5, z8.d // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i16p | fminp h7, v10.2h // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2i32p | fminp s17, v7.2s // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv4f32 | fminp v25.4s, v2.4s, v15.4s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMINPv2f32 | fminp v14.2s, v28.2s, v15.2s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i16v | fminv h3, v30.4h // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitV[3] | FMINVv8i16v | fminv h29, v12.8h // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV[2] | FMINVv4i32v | fminv s16, v19.4s // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 13 | 13 | 0.33 | V1UnitV[6], V1UnitV01[6] | FMINV_VPZ_H | fminv h15, p2, z25.h // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 1 13 13 0.33 V1UnitV01[6]
-# CHECK-NEXT: 1 | 11 | 11 | 0.40 | V1UnitV[5], V1UnitV01[5] | FMINV_VPZ_S | fminv s4, p0, z6.s // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 1 11 11 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 9 | 9 | 0.50 | V1UnitV[4], V1UnitV01[4] | FMINV_VPZ_D | fminv d20, p1, z5.d // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 1 9 9 0.5 V1UnitV01[4]
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i16_indexed | fmla h23, h24, v15.h[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i32_indexed | fmla s9, s20, v28.s[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv1i64_indexed | fmla d12, d20, v7.d[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv8i16_indexed | fmla v29.8h, v15.8h, v10.h[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i32_indexed | fmla v2.2s, v16.2s, v28.s[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4i32_indexed | fmla v14.4s, v14.4s, v5.s[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2i64_indexed | fmla v10.2d, v14.2d, v21.d[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_H | fmla z2.h, z4.h, z7.h[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_S | fmla z22.s, z15.s, z1.s[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZZZI_D | fmla z1.d, z30.d, z11.d[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv4f32 | fmla v1.4s, v24.4s, v12.4s // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLAv2f64 | fmla v30.2d, v16.2d, v6.2d // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_S | fmla z6.s, p1/m, z24.s, z24.s // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i16_indexed | fmls h8, h14, v7.h[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i32_indexed | fmls s20, s17, v5.s[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv1i64_indexed | fmls d11, d24, v29.d[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8i16_indexed | fmls v30.8h, v18.8h, v4.h[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i32_indexed | fmls v10.2s, v27.2s, v0.s[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv4i32_indexed | fmls v27.4s, v7.4s, v24.s[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2i64_indexed | fmls v10.2d, v22.2d, v29.d[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_H | fmls z3.h, z31.h, z0.h[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_S | fmls z30.s, z8.s, z0.s[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZZZI_D | fmls z10.d, z20.d, z0.d[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv2f32 | fmls v6.2s, v3.2s, v12.2s // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMLSv8f16 | fmls v6.8h, v15.8h, v23.8h // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLS_ZPmZZ_S | fmls z26.s, p5/m, z28.s, z26.s // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHWr | fmov w15, h31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHXr | fmov x21, h14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABD16 | fabd h27, h20, h17 // FABD <Hd>, <Hn>, <Hm> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABD32 | fabd s16, s29, s6 // FABD <V><d>, <V><n>, <V><m> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABDv8f16 | fabd v13.8h, v28.8h, v12.8h // FABD <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABDv4f32 | fabd v12.4s, v4.4s, v31.4s // FABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FABD_ZPmZ_H | fabd z11.h, p6/m, z11.h, z5.h // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point absolute value/difference \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABSHr | fabs h25, h7 // FABS <Hd>, <Hn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABSSr | fabs s17, s12 // FABS <Sd>, <Sn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABSDr | fabs d30, d8 // FABS <Dd>, <Dn> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABSv4f32 | fabs v16.4s, v31.4s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FABSv2f32 | fabs v17.2s, v28.2s // FABS <Vd>.<T>, <Vn>.<T> \\ ASIMD FP absolute value/difference \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FABS_ZPmZ_S | fabs z26.s, p7/m, z24.s // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point absolute value/difference \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FACGE_PPzZZ_H | facge p0.h, p5/z, z15.h, z18.h // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FACGT_PPzZZ_S | facgt p7.s, p7/z, z10.s, z4.s // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGE16 | facge h24, h26, h29 // FACGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGE64 | facge d25, d24, d7 // FACGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGEv4f16 | facge v25.4h, v16.4h, v11.4h // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGEv2f32 | facge v19.2s, v24.2s, v5.2s // FACGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGT16 | facgt h0, h4, h10 // FACGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGT32 | facgt s29, s3, s2 // FACGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGTv8f16 | facgt v22.8h, v14.8h, v31.8h // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FACGTv4f32 | facgt v22.4s, v8.4s, v2.4s // FACGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FACGE_PPzZZ_H | facge p7.h, p5/z, z27.h, z22.h // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FACGT_PPzZZ_H | facgt p5.h, p5/z, z16.h, z31.h // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FADD_ZPmI_H | fadd z4.h, p7/m, z4.h, #1.0 // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDHrr | fadd h23, h27, h22 // FADD <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDSrr | fadd s1, s23, s27 // FADD <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDDrr | fadd d16, d15, d21 // FADD <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDv2f64 | fadd v7.2d, v30.2d, v20.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDv2f64 | fadd v16.2d, v13.2d, v11.2d // FADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FADD_ZPmZ_H | fadd z26.h, p4/m, z26.h, z1.h // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FADD_ZZZ_S | fadd z23.s, z7.s, z16.s // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 19 | 19 | 0.06 | V1UnitSVE0[36], V1UnitSVE01[36], V1UnitV[36], V1UnitV02[36] | FADDA_VPZ_H | fadda h8, p3, h8, z28.h // FADDA H<dn>, <Pg>, H<dn>, <Zm>.H \\ Floating point associative add, F16 \\ 2 19 19 0.06 V1UnitSVE0[36],V1UnitSVE0[36]
+# CHECK-NEXT: 2 | 11 | 11 | 0.10 | V1UnitSVE0[20], V1UnitSVE01[20], V1UnitV[20], V1UnitV02[20] | FADDA_VPZ_S | fadda s11, p6, s11, z1.s // FADDA S<dn>, <Pg>, S<dn>, <Zm>.S \\ Floating point associative add, F32 \\ 2 11 11 0.1 V1UnitSVE0[20],V1UnitSVE0[20]
+# CHECK-NEXT: 2 | 8 | 8 | 0.67 | V1UnitSVE01[6], V1UnitV[6] | FADDA_VPZ_D | fadda d27, p4, d27, z27.d // FADDA D<dn>, <Pg>, D<dn>, <Zm>.D \\ Floating point associative add, F64 \\ 2 8 8 0.67 V1UnitSVE01[6],V1UnitSVE01[6]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDPv2i16p | faddp h10, v19.2h // FADDP <Vh><d>, <Vn>.<Th> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDPv2i64p | faddp d11, v28.2d // FADDP <V><d>, <Vn>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDPv2f64 | faddp v16.2d, v11.2d, v5.2d // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FADDPv4f32 | faddp v16.4s, v11.4s, v18.4s // FADDP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 13 | 13 | 0.33 | V1UnitSVE01[12], V1UnitV[12] | FADDV_VPZ_H | faddv h21, p2, z3.h // FADDV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.40 | V1UnitSVE01[10], V1UnitV[10] | FADDV_VPZ_S | faddv s16, p2, z25.s // FADDV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | FADDV_VPZ_D | faddv d18, p4, z7.d // FADDV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCADD_ZPmZ_H | fcadd z29.h, p2/m, z29.h, z15.h, #270 // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> \\ Floating point complex add \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPHrr | fccmp h31, h3, #11, hs // FCCMP <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPSrr | fccmp s5, s6, #0, lo // FCCMP <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPDrr | fccmp d17, d15, #0, ne // FCCMP <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEHrr | fccmpe h6, h1, #12, ne // FCCMPE <Hn>, <Hm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPESrr | fccmpe s16, s13, #10, vs // FCCMPE <Sn>, <Sm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCCMPEDrr | fccmpe d17, d14, #15, ls // FCCMPE <Dn>, <Dm>, #<nzcv>, <cond> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMEQ_PPzZZ_D | fcmeq p7.d, p1/z, z23.d, z21.d // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGE_PPzZZ_H | fcmge p6.h, p1/z, z19.h, z10.h // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGT_PPzZZ_S | fcmgt p5.s, p2/z, z29.s, z5.s // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMNE_PPzZZ_D | fcmne p5.d, p0/z, z22.d, z15.d // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMUO_PPzZZ_D | fcmuo p0.d, p2/z, z15.d, z23.d // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMEQ_PPzZ0_D | fcmeq p4.d, p5/z, z19.d, #0.0 // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGE_PPzZ0_D | fcmge p0.d, p5/z, z10.d, #0.0 // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGT_PPzZ0_D | fcmgt p6.d, p1/z, z8.d, #0.0 // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMLE_PPzZ0_D | fcmle p2.d, p4/z, z26.d, #0.0 // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMLT_PPzZ0_D | fcmlt p5.d, p5/z, z23.d, #0.0 // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMNE_PPzZ0_H | fcmne p2.h, p3/z, z7.h, #0.0 // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQ16 | fcmeq h30, h6, h1 // FCMEQ <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQ32 | fcmeq s17, s0, s21 // FCMEQ <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv2f32 | fcmeq v19.2s, v31.2s, v19.2s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv4f32 | fcmeq v12.4s, v11.4s, v26.4s // FCMEQ <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv1i16rz | fcmeq h19, h23, #0.0 // FCMEQ <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv1i32rz | fcmeq s25, s18, #0.0 // FCMEQ <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv2i32rz | fcmeq v8.2s, v16.2s, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMEQv2i64rz | fcmeq v18.2d, v17.2d, #0.0 // FCMEQ <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGE16 | fcmge h1, h16, h12 // FCMGE <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGE64 | fcmge d29, d9, d3 // FCMGE <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv8f16 | fcmge v20.8h, v19.8h, v22.8h // FCMGE <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv2f64 | fcmge v17.2d, v11.2d, v13.2d // FCMGE <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv1i16rz | fcmge h10, h23, #0.0 // FCMGE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv1i64rz | fcmge d5, d17, #0.0 // FCMGE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv4i16rz | fcmge v18.4h, v27.4h, #0.0 // FCMGE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGEv2i32rz | fcmge v17.2s, v11.2s, #0.0 // FCMGE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGT16 | fcmgt h4, h5, h0 // FCMGT <Hd>, <Hn>, <Hm> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGT32 | fcmgt s13, s20, s3 // FCMGT <V><d>, <V><n>, <V><m> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv8f16 | fcmgt v24.8h, v24.8h, v28.8h // FCMGT <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv4f32 | fcmgt v19.4s, v20.4s, v13.4s // FCMGT <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv1i16rz | fcmgt h0, h18, #0.0 // FCMGT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv1i64rz | fcmgt d30, d23, #0.0 // FCMGT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv8i16rz | fcmgt v0.8h, v11.8h, #0.0 // FCMGT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMGTv2i64rz | fcmgt v19.2d, v31.2d, #0.0 // FCMGT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCMLA_ZZZI_H | fcmla z20.h, z12.h, z4.h[1], #90 // FCMLA <Zda>.H, <Zn>.H, <Zmh>.H[<immh>], <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCMLA_ZZZI_S | fcmla z1.s, z27.s, z6.s[0], #90 // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 5 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCMLA_ZPmZZ_S | fcmla z25.s, p3/m, z13.s, z23.s, #180 // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> \\ Floating point complex multiply add \\ 2 5 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGE_PPzZZ_S | fcmge p5.s, p3/z, z12.s, z28.s // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLEv1i16rz | fcmle h18, h28, #0.0 // FCMLE <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLEv1i64rz | fcmle d18, d16, #0.0 // FCMLE <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLEv8i16rz | fcmle v16.8h, v11.8h, #0.0 // FCMLE <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLEv4i32rz | fcmle v22.4s, v30.4s, #0.0 // FCMLE <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCMGT_PPzZZ_S | fcmgt p1.s, p1/z, z24.s, z13.s // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T> \\ Floating point compare \\ 2 2 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLTv1i16rz | fcmlt h23, h7, #0.0 // FCMLT <Hd>, <Hn>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLTv1i64rz | fcmlt d22, d28, #0.0 // FCMLT <V><d>, <V><n>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLTv4i16rz | fcmlt v8.4h, v2.4h, #0.0 // FCMLT <Vd>.<Th>, <Vn>.<Th>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FCMLTv2i64rz | fcmlt v7.2d, v16.2d, #0.0 // FCMLT <Vd>.<T>, <Vn>.<T>, #0.0 \\ ASIMD FP compare \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHrr | fcmp h5, h21 // FCMP <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPHri | fcmp h5, #0.0 // FCMP <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSrr | fcmp s7, s0 // FCMP <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPSri | fcmp s28, #0.0 // FCMP <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDrr | fcmp d1, d27 // FCMP <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPDri | fcmp d16, #0.0 // FCMP <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHrr | fcmpe h22, h21 // FCMPE <Hn>, <Hm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEHri | fcmpe h13, #0.0 // FCMPE <Hn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESrr | fcmpe s11, s29 // FCMPE <Sn>, <Sm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPESri | fcmpe s15, #0.0 // FCMPE <Sn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDrr | fcmpe d27, d22 // FCMPE <Dn>, <Dm> \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCMPEDri | fcmpe d9, #0.0 // FCMPE <Dn>, #0.0 \\ FP compare \\ 1 2 2 1.0 V1UnitV0
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCPY_ZPmI_H | fmov z2.h, p7/m, #0.50000000 // FCPY <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | FCSELHrrr | fcsel h26, h2, h11, hs // FCSEL <Hd>, <Hn>, <Hm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | FCSELSrrr | fcsel s5, s1, s4, vc // FCSEL <Sd>, <Sn>, <Sm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | FCSELDrrr | fcsel d14, d0, d19, eq // FCSEL <Dd>, <Dn>, <Dm>, <cond> \\ FP select \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTSHr | fcvt s13, h13 // FCVT <Sd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTDHr | fcvt d10, h6 // FCVT <Dd>, <Hn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTHSr | fcvt h1, s1 // FCVT <Hd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTDSr | fcvt d9, s23 // FCVT <Dd>, <Sn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTHDr | fcvt h17, d16 // FCVT <Hd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTSDr | fcvt s31, d27 // FCVT <Sd>, <Dn> \\ FP convert, from vec to vec reg \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVT_ZPmZ_HtoS | fcvt z0.s, p1/m, z4.h // FCVT <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVT_ZPmZ_HtoD | fcvt z6.d, p0/m, z17.h // FCVT <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVT_ZPmZ_StoH | fcvt z7.h, p7/m, z5.s // FCVT <Zd>.H, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F32 or F32 to F16) \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVT_ZPmZ_StoD | fcvt z11.d, p2/m, z18.s // FCVT <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVT_ZPmZ_DtoH | fcvt z26.h, p0/m, z30.d // FCVT <Zd>.H, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVT_ZPmZ_DtoS | fcvt z13.s, p2/m, z3.d // FCVT <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWHr | fcvtas w23, h3 // FCVTAS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXHr | fcvtas x14, h29 // FCVTAS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWSr | fcvtas w0, s13 // FCVTAS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXSr | fcvtas x23, s15 // FCVTAS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUWDr | fcvtas w1, d31 // FCVTAS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTASUXDr | fcvtas x2, d3 // FCVTAS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTASv1f16 | fcvtas h27, h24 // FCVTAS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTASv1i32 | fcvtas s16, s0 // FCVTAS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTASv1i64 | fcvtas d14, d7 // FCVTAS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTASv4f16 | fcvtas v5.4h, v16.4h // FCVTAS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTASv8f16 | fcvtas v13.8h, v30.8h // FCVTAS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTASv2f32 | fcvtas v12.2s, v1.2s // FCVTAS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTASv4f32 | fcvtas v9.4s, v31.4s // FCVTAS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTASv2f64 | fcvtas v2.2d, v22.2d // FCVTAS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWHr | fcvtau w13, h27 // FCVTAU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXHr | fcvtau x8, h12 // FCVTAU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWSr | fcvtau w20, s10 // FCVTAU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXSr | fcvtau x27, s22 // FCVTAU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUWDr | fcvtau w6, d26 // FCVTAU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTAUUXDr | fcvtau x16, d13 // FCVTAU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTAUv1f16 | fcvtau h6, h29 // FCVTAU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTAUv1i32 | fcvtau s23, s7 // FCVTAU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTAUv1i64 | fcvtau d1, d26 // FCVTAU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTAUv4f16 | fcvtau v12.4h, v13.4h // FCVTAU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTAUv8f16 | fcvtau v21.8h, v0.8h // FCVTAU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTAUv2f32 | fcvtau v31.2s, v6.2s // FCVTAU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTAUv4f32 | fcvtau v29.4s, v26.4s // FCVTAU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTAUv2f64 | fcvtau v9.2d, v7.2d // FCVTAU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTLv4i16 | fcvtl v30.4s, v4.4h // FCVTL <Vd>.4S, <Vn>.4H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTLv2i32 | fcvtl v28.2d, v13.2s // FCVTL <Vd>.2D, <Vn>.2S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTLv8i16 | fcvtl2 v14.4s, v29.8h // FCVTL2 <Vd>.4S, <Vn>.8H \\ ASIMD FP convert, long (F16 to F32) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTLv4i32 | fcvtl2 v0.2d, v9.4s // FCVTL2 <Vd>.2D, <Vn>.4S \\ ASIMD FP convert, long (F32 to F64) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWHr | fcvtms w15, h1 // FCVTMS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXHr | fcvtms x5, h2 // FCVTMS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWSr | fcvtms w1, s16 // FCVTMS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXSr | fcvtms x27, s22 // FCVTMS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUWDr | fcvtms w18, d21 // FCVTMS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMSUXDr | fcvtms x6, d26 // FCVTMS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMSv1f16 | fcvtms h19, h29 // FCVTMS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMSv1i32 | fcvtms s30, s14 // FCVTMS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMSv1i64 | fcvtms d8, d20 // FCVTMS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTMSv4f16 | fcvtms v27.4h, v7.4h // FCVTMS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTMSv8f16 | fcvtms v26.8h, v11.8h // FCVTMS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMSv2f32 | fcvtms v13.2s, v2.2s // FCVTMS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTMSv4f32 | fcvtms v18.4s, v21.4s // FCVTMS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMSv2f64 | fcvtms v15.2d, v16.2d // FCVTMS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWHr | fcvtmu w20, h6 // FCVTMU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXHr | fcvtmu x7, h18 // FCVTMU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWSr | fcvtmu w24, s19 // FCVTMU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXSr | fcvtmu x7, s15 // FCVTMU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUWDr | fcvtmu w16, d16 // FCVTMU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTMUUXDr | fcvtmu x1, d18 // FCVTMU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMUv1f16 | fcvtmu h20, h13 // FCVTMU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMUv1i32 | fcvtmu s28, s25 // FCVTMU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMUv1i64 | fcvtmu d3, d27 // FCVTMU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTMUv4f16 | fcvtmu v18.4h, v2.4h // FCVTMU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTMUv8f16 | fcvtmu v10.8h, v11.8h // FCVTMU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMUv2f32 | fcvtmu v27.2s, v14.2s // FCVTMU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTMUv4f32 | fcvtmu v31.4s, v4.4s // FCVTMU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTMUv2f64 | fcvtmu v6.2d, v26.2d // FCVTMU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNv4i16 | fcvtn v4.4h, v22.4s // FCVTN <Vd>.4H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNv2i32 | fcvtn v14.2s, v2.2d // FCVTN <Vd>.2S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNv8i16 | fcvtn2 v0.8h, v30.4s // FCVTN2 <Vd>.8H, <Vn>.4S \\ ASIMD FP convert, narrow (F32 to F16) \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNv4i32 | fcvtn2 v21.4s, v13.2d // FCVTN2 <Vd>.4S, <Vn>.2D \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWHr | fcvtns w19, h15 // FCVTNS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXHr | fcvtns x20, h0 // FCVTNS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWSr | fcvtns w10, s5 // FCVTNS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXSr | fcvtns x14, s12 // FCVTNS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUWDr | fcvtns w30, d2 // FCVTNS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNSUXDr | fcvtns x0, d12 // FCVTNS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNSv1f16 | fcvtns h16, h25 // FCVTNS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNSv1i32 | fcvtns s23, s19 // FCVTNS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNSv1i64 | fcvtns d30, d1 // FCVTNS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNSv4f16 | fcvtns v28.4h, v19.4h // FCVTNS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTNSv8f16 | fcvtns v19.8h, v19.8h // FCVTNS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNSv2f32 | fcvtns v20.2s, v4.2s // FCVTNS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNSv4f32 | fcvtns v28.4s, v29.4s // FCVTNS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNSv2f64 | fcvtns v21.2d, v31.2d // FCVTNS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWHr | fcvtnu w12, h3 // FCVTNU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXHr | fcvtnu x23, h27 // FCVTNU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWSr | fcvtnu w4, s23 // FCVTNU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXSr | fcvtnu x5, s28 // FCVTNU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUWDr | fcvtnu w4, d11 // FCVTNU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTNUUXDr | fcvtnu x12, d8 // FCVTNU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNUv1f16 | fcvtnu h24, h22 // FCVTNU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNUv1i32 | fcvtnu s29, s22 // FCVTNU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNUv1i64 | fcvtnu d18, d15 // FCVTNU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNUv4f16 | fcvtnu v5.4h, v12.4h // FCVTNU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTNUv8f16 | fcvtnu v26.8h, v20.8h // FCVTNU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNUv2f32 | fcvtnu v15.2s, v1.2s // FCVTNU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTNUv4f32 | fcvtnu v7.4s, v16.4s // FCVTNU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTNUv2f64 | fcvtnu v13.2d, v8.2d // FCVTNU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWHr | fcvtps w27, h14 // FCVTPS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXHr | fcvtps x26, h20 // FCVTPS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWSr | fcvtps w5, s27 // FCVTPS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXSr | fcvtps x29, s6 // FCVTPS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUWDr | fcvtps w23, d25 // FCVTPS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPSUXDr | fcvtps x10, d16 // FCVTPS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPSv1f16 | fcvtps h31, h22 // FCVTPS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPSv1i32 | fcvtps s3, s3 // FCVTPS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPSv1i64 | fcvtps d10, d26 // FCVTPS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTPSv4f16 | fcvtps v13.4h, v26.4h // FCVTPS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTPSv8f16 | fcvtps v26.8h, v10.8h // FCVTPS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPSv2f32 | fcvtps v18.2s, v8.2s // FCVTPS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTPSv4f32 | fcvtps v12.4s, v18.4s // FCVTPS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPSv2f64 | fcvtps v3.2d, v2.2d // FCVTPS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWHr | fcvtpu w25, h22 // FCVTPU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXHr | fcvtpu x4, h24 // FCVTPU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWSr | fcvtpu w13, s0 // FCVTPU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXSr | fcvtpu x0, s17 // FCVTPU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUWDr | fcvtpu w16, d25 // FCVTPU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTPUUXDr | fcvtpu x15, d12 // FCVTPU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPUv1f16 | fcvtpu h1, h29 // FCVTPU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPUv1i32 | fcvtpu s21, s30 // FCVTPU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPUv1i64 | fcvtpu d16, d26 // FCVTPU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTPUv4f16 | fcvtpu v2.4h, v25.4h // FCVTPU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTPUv8f16 | fcvtpu v24.8h, v26.8h // FCVTPU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPUv2f32 | fcvtpu v6.2s, v23.2s // FCVTPU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTPUv4f32 | fcvtpu v10.4s, v6.4s // FCVTPU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTPUv2f64 | fcvtpu v7.2d, v23.2d // FCVTPU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTXNv1i64 | fcvtxn s29, d4 // FCVTXN <Vb><d>, <Va><n> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTXNv2f32 | fcvtxn v25.2s, v15.2d // FCVTXN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTXNv4f32 | fcvtxn2 v21.4s, v6.2d // FCVTXN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD FP convert, narrow (F64 to F32) \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWHri | fcvtzs w28, h26, #26 // FCVTZS <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXHri | fcvtzs x22, h17, #58 // FCVTZS <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWSri | fcvtzs w17, s23, #22 // FCVTZS <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXSri | fcvtzs x15, s30, #2 // FCVTZS <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSWDri | fcvtzs w13, d17, #17 // FCVTZS <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSSXDri | fcvtzs x14, d9, #24 // FCVTZS <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWHr | fcvtzs w15, h10 // FCVTZS <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXHr | fcvtzs x4, h21 // FCVTZS <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWSr | fcvtzs w1, s4 // FCVTZS <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXSr | fcvtzs x27, s27 // FCVTZS <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUWDr | fcvtzs w24, d30 // FCVTZS <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZSUXDr | fcvtzs x18, d21 // FCVTZS <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSh | fcvtzs h29, h23, #16 // FCVTZS H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSs | fcvtzs s23, s15, #2 // FCVTZS S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSd | fcvtzs d20, d26, #57 // FCVTZS D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZSv4i16_shift | fcvtzs v20.4h, v24.4h, #11 // FCVTZS <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZSv8i16_shift | fcvtzs v18.8h, v10.8h, #7 // FCVTZS <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv2i32_shift | fcvtzs v16.2s, v2.2s, #11 // FCVTZS <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZSv4i32_shift | fcvtzs v22.4s, v18.4s, #5 // FCVTZS <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv2i64_shift | fcvtzs v14.2d, v30.2d, #54 // FCVTZS <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv1f16 | fcvtzs h16, h27 // FCVTZS <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv1i32 | fcvtzs s4, s5 // FCVTZS S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv1i64 | fcvtzs d4, d23 // FCVTZS D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZSv4f16 | fcvtzs v8.4h, v16.4h // FCVTZS <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZSv8f16 | fcvtzs v2.8h, v16.8h // FCVTZS <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv2f32 | fcvtzs v27.2s, v28.2s // FCVTZS <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZSv4f32 | fcvtzs v29.4s, v18.4s // FCVTZS <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZSv2f64 | fcvtzs v13.2d, v31.2d // FCVTZS <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZS_ZPmZ_HtoH | fcvtzs z1.h, p2/m, z6.h // FCVTZS <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZS_ZPmZ_HtoS | fcvtzs z19.s, p4/m, z16.h // FCVTZS <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZS_ZPmZ_HtoD | fcvtzs z14.d, p0/m, z6.h // FCVTZS <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZS_ZPmZ_StoS | fcvtzs z25.s, p5/m, z23.s // FCVTZS <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZS_ZPmZ_StoD | fcvtzs z3.d, p1/m, z31.s // FCVTZS <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZS_ZPmZ_DtoS | fcvtzs z28.s, p5/m, z23.d // FCVTZS <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZS_ZPmZ_DtoD | fcvtzs z22.d, p6/m, z29.d // FCVTZS <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWHri | fcvtzu w12, h19, #20 // FCVTZU <Wd>, <Hn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXHri | fcvtzu x17, h23, #12 // FCVTZU <Xd>, <Hn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWSri | fcvtzu w16, s3, #12 // FCVTZU <Wd>, <Sn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXSri | fcvtzu x27, s15, #8 // FCVTZU <Xd>, <Sn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSWDri | fcvtzu w21, d10, #23 // FCVTZU <Wd>, <Dn>, #<sfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUSXDri | fcvtzu x26, d30, #27 // FCVTZU <Xd>, <Dn>, #<dfbits> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWHr | fcvtzu w26, h30 // FCVTZU <Wd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXHr | fcvtzu x9, h11 // FCVTZU <Xd>, <Hn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWSr | fcvtzu w20, s16 // FCVTZU <Wd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXSr | fcvtzu x7, s21 // FCVTZU <Xd>, <Sn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUWDr | fcvtzu w25, d30 // FCVTZU <Wd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FCVTZUUXDr | fcvtzu x13, d8 // FCVTZU <Xd>, <Dn> \\ FP convert, from vec to gen reg \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUh | fcvtzu h19, h8, #12 // FCVTZU H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUs | fcvtzu s25, s27, #10 // FCVTZU S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUd | fcvtzu d30, d16, #42 // FCVTZU D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZUv4i16_shift | fcvtzu v19.4h, v26.4h, #9 // FCVTZU <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZUv8i16_shift | fcvtzu v27.8h, v6.8h, #11 // FCVTZU <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv2i32_shift | fcvtzu v30.2s, v4.2s, #19 // FCVTZU <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZUv4i32_shift | fcvtzu v31.4s, v6.4s, #22 // FCVTZU <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv2i64_shift | fcvtzu v10.2d, v12.2d, #53 // FCVTZU <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv1f16 | fcvtzu h25, h30 // FCVTZU <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv1i32 | fcvtzu s2, s19 // FCVTZU S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv1i64 | fcvtzu d4, d7 // FCVTZU D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZUv4f16 | fcvtzu v3.4h, v2.4h // FCVTZU <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZUv8f16 | fcvtzu v30.8h, v25.8h // FCVTZU <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv2f32 | fcvtzu v25.2s, v25.2s // FCVTZU <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZUv4f32 | fcvtzu v21.4s, v2.4s // FCVTZU <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FCVTZUv2f64 | fcvtzu v23.2d, v15.2d // FCVTZU <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZU_ZPmZ_HtoH | fcvtzu z15.h, p0/m, z8.h // FCVTZU <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZU_ZPmZ_HtoS | fcvtzu z8.s, p5/m, z18.h // FCVTZU <Zd>.S, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FCVTZU_ZPmZ_HtoD | fcvtzu z11.d, p4/m, z24.h // FCVTZU <Zd>.D, <Pg>/M, <Zn>.H \\ Floating point convert to integer, F16 \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZU_ZPmZ_StoS | fcvtzu z13.s, p7/m, z8.s // FCVTZU <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FCVTZU_ZPmZ_StoD | fcvtzu z20.d, p2/m, z13.s // FCVTZU <Zd>.D, <Pg>/M, <Zn>.S \\ Floating point convert to integer, F32 \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZU_ZPmZ_DtoS | fcvtzu z31.s, p3/m, z20.d // FCVTZU <Zd>.S, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FCVTZU_ZPmZ_DtoD | fcvtzu z4.d, p1/m, z25.d // FCVTZU <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point convert to integer, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FDIVHrr | fdiv h1, h26, h23 // FDIV <Hd>, <Hn>, <Hm> \\ FP divide, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 10 | 10 | 0.67 | V1UnitSVE0[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV02[3] | FDIVSrr | fdiv s31, s18, s12 // FDIV <Sd>, <Sn>, <Sm> \\ FP divide, S-form \\ 1 10 10 0.67 V1UnitV02[3]
+# CHECK-NEXT: 1 | 15 | 15 | 0.29 | V1UnitSVE0[7], V1UnitSVE01[7], V1UnitV[7], V1UnitV02[7] | FDIVDrr | fdiv d6, d3, d0 // FDIV <Dd>, <Dn>, <Dm> \\ FP divide, D-form \\ 1 15 15 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitSVE0[7], V1UnitSVE01[7], V1UnitV[7], V1UnitV02[7] | FDIVv4f16 | fdiv v21.4h, v15.4h, v22.4h // FDIV <Vd>.4H, <Vn>.4H, <Vm>.4H \\ ASIMD FP divide, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitSVE0[13], V1UnitSVE01[13], V1UnitV[13], V1UnitV02[13] | FDIVv8f16 | fdiv v31.8h, v12.8h, v15.8h // FDIV <Vd>.8H, <Vn>.8H, <Vm>.8H \\ ASIMD FP divide, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitSVE0[5], V1UnitSVE01[5], V1UnitV[5], V1UnitV02[5] | FDIVv2f32 | fdiv v15.2s, v23.2s, v2.2s // FDIV <Vd>.2S, <Vn>.2S, <Vm>.2S \\ ASIMD FP divide, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitSVE0[9], V1UnitSVE01[9], V1UnitV[9], V1UnitV02[9] | FDIVv4f32 | fdiv v7.4s, v27.4s, v22.4s // FDIV <Vd>.4S, <Vn>.4S, <Vm>.4S \\ ASIMD FP divide, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+# CHECK-NEXT: 1 | 15 | 15 | 0.14 | V1UnitSVE0[14], V1UnitSVE01[14], V1UnitV[14], V1UnitV02[14] | FDIVv2f64 | fdiv v31.2d, v25.2d, v8.2d // FDIV <Vd>.2D, <Vn>.2D, <Vm>.2D \\ ASIMD FP divide, Q-form, F64 \\ 1 15 15 0.14 V1UnitV02[14]
+# CHECK-NEXT: 2 | 13 | 13 | 0.08 | V1UnitSVE0[24], V1UnitSVE01[24], V1UnitV[24], V1UnitV02[24] | FDIV_ZPmZ_H | fdiv z21.h, p7/m, z21.h, z15.h // FDIV <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+# CHECK-NEXT: 2 | 10 | 10 | 0.11 | V1UnitSVE0[18], V1UnitSVE01[18], V1UnitV[18], V1UnitV02[18] | FDIV_ZPmZ_S | fdiv z17.s, p4/m, z17.s, z20.s // FDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+# CHECK-NEXT: 2 | 15 | 15 | 0.07 | V1UnitSVE0[28], V1UnitSVE01[28], V1UnitV[28], V1UnitV02[28] | FDIV_ZPmZ_D | fdiv z13.d, p3/m, z13.d, z28.d // FDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 2 15 15 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+# CHECK-NEXT: 2 | 13 | 13 | 0.08 | V1UnitSVE0[24], V1UnitSVE01[24], V1UnitV[24], V1UnitV02[24] | FDIVR_ZPmZ_H | fdivr z29.h, p4/m, z29.h, z1.h // FDIVR <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Floating point divide, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+# CHECK-NEXT: 2 | 10 | 10 | 0.11 | V1UnitSVE0[18], V1UnitSVE01[18], V1UnitV[18], V1UnitV02[18] | FDIVR_ZPmZ_S | fdivr z13.s, p0/m, z13.s, z29.s // FDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Floating point divide, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+# CHECK-NEXT: 2 | 15 | 15 | 0.07 | V1UnitSVE0[28], V1UnitSVE01[28], V1UnitV[28], V1UnitV02[28] | FDIVR_ZPmZ_D | fdivr z14.d, p3/m, z14.d, z31.d // FDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Floating point divide, F64 \\ 2 15 15 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FDUP_ZI_S | fmov z19.s, #0.50000000 // FDUP <Zd>.<T>, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FEXPA_ZZ_H | fexpa z6.h, z3.h // FEXPA <Zd>.<T>, <Zn>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAD_ZPmZZ_S | fmad z9.s, p5/m, z9.s, z7.s // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMADDHrrr | fmadd h27, h0, h6, h28 // FMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMADDSrrr | fmadd s13, s24, s15, s5 // FMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMADDDrrr | fmadd d19, d4, d2, d17 // FMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAX_ZPmI_D | fmax z25.d, p2/m, z25.d, #0.0 // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXHrr | fmax h8, h7, h11 // FMAX <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXSrr | fmax s9, s21, s2 // FMAX <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXDrr | fmax d4, d26, d26 // FMAX <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXv4f32 | fmax v0.4s, v13.4s, v21.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXv4f32 | fmax v12.4s, v27.4s, v11.4s // FMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAX_ZPmZ_S | fmax z16.s, p5/m, z16.s, z12.s // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXNM_ZPmI_D | fmaxnm z25.d, p5/m, z25.d, #1.0 // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMHrr | fmaxnm h29, h13, h14 // FMAXNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMSrr | fmaxnm s25, s20, s0 // FMAXNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMDrr | fmaxnm d29, d25, d16 // FMAXNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMv4f32 | fmaxnm v6.4s, v3.4s, v3.4s // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMv2f64 | fmaxnm v9.2d, v15.2d, v11.2d // FMAXNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXNM_ZPmZ_S | fmaxnm z6.s, p5/m, z6.s, z17.s // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMPv2i16p | fmaxnmp h25, v19.2h // FMAXNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMPv2i64p | fmaxnmp d17, v29.2d // FMAXNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMPv4f32 | fmaxnmp v31.4s, v4.4s, v2.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXNMPv4f32 | fmaxnmp v23.4s, v15.4s, v1.4s // FMAXNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXNMVv4i16v | fmaxnmv h0, v13.4h // FMAXNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitSVE01[3], V1UnitV[3] | FMAXNMVv8i16v | fmaxnmv h12, v11.8h // FMAXNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXNMVv4i32v | fmaxnmv s28, v31.4s // FMAXNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 2 | 13 | 13 | 0.33 | V1UnitSVE01[12], V1UnitV[12] | FMAXNMV_VPZ_H | fmaxnmv h9, p3, z2.h // FMAXNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.40 | V1UnitSVE01[10], V1UnitV[10] | FMAXNMV_VPZ_S | fmaxnmv s26, p6, z0.s // FMAXNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | FMAXNMV_VPZ_D | fmaxnmv d7, p1, z29.d // FMAXNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXPv2i16p | fmaxp h15, v25.2h // FMAXP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXPv2i32p | fmaxp s6, v2.2s // FMAXP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXPv2f32 | fmaxp v21.2s, v17.2s, v13.2s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMAXPv4f32 | fmaxp v10.4s, v5.4s, v25.4s // FMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXVv4i16v | fmaxv h23, v4.4h // FMAXV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitSVE01[3], V1UnitV[3] | FMAXVv8i16v | fmaxv h25, v15.8h // FMAXV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMAXVv4i32v | fmaxv s23, v2.4s // FMAXV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 2 | 13 | 13 | 0.33 | V1UnitSVE01[12], V1UnitV[12] | FMAXV_VPZ_H | fmaxv h12, p0, z22.h // FMAXV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.40 | V1UnitSVE01[10], V1UnitV[10] | FMAXV_VPZ_S | fmaxv s24, p5, z12.s // FMAXV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | FMAXV_VPZ_D | fmaxv d1, p6, z25.d // FMAXV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMIN_ZPmI_D | fmin z24.d, p4/m, z24.d, #0.0 // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINHrr | fmin h4, h13, h17 // FMIN <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINSrr | fmin s1, s14, s22 // FMIN <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINDrr | fmin d18, d19, d22 // FMIN <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINv4f32 | fmin v6.4s, v25.4s, v27.4s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINv2f32 | fmin v12.2s, v30.2s, v25.2s // FMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMIN_ZPmZ_H | fmin z11.h, p3/m, z11.h, z16.h // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINNM_ZPmI_H | fminnm z19.h, p4/m, z19.h, #0.0 // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMHrr | fminnm h29, h23, h17 // FMINNM <Hd>, <Hn>, <Hm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMSrr | fminnm s24, s14, s30 // FMINNM <Sd>, <Sn>, <Sm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMDrr | fminnm d0, d26, d8 // FMINNM <Dd>, <Dn>, <Dm> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMv2f32 | fminnm v16.2s, v23.2s, v27.2s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMv4f32 | fminnm v23.4s, v19.4s, v22.4s // FMINNM <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINNM_ZPmZ_S | fminnm z24.s, p3/m, z24.s, z13.s // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point min/max \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMPv2i16p | fminnmp h20, v14.2h // FMINNMP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMPv2i64p | fminnmp d15, v8.2d // FMINNMP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMPv2f64 | fminnmp v27.2d, v27.2d, v16.2d // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINNMPv4f32 | fminnmp v2.4s, v14.4s, v14.4s // FMINNMP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINNMVv4i16v | fminnmv h19, v25.4h // FMINNMV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitSVE01[3], V1UnitV[3] | FMINNMVv8i16v | fminnmv h23, v17.8h // FMINNMV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINNMVv4i32v | fminnmv s29, v17.4s // FMINNMV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 2 | 13 | 13 | 0.33 | V1UnitSVE01[12], V1UnitV[12] | FMINNMV_VPZ_H | fminnmv h24, p3, z1.h // FMINNMV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.40 | V1UnitSVE01[10], V1UnitV[10] | FMINNMV_VPZ_S | fminnmv s30, p3, z9.s // FMINNMV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | FMINNMV_VPZ_D | fminnmv d18, p5, z8.d // FMINNMV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINPv2i16p | fminp h7, v10.2h // FMINP <Vh><d>, <Vn>.<Th> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINPv2i32p | fminp s17, v7.2s // FMINP <V><d>, <Vn>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINPv4f32 | fminp v25.4s, v2.4s, v15.4s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMINPv2f32 | fminp v14.2s, v28.2s, v15.2s // FMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP max/min, pairwise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINVv4i16v | fminv h3, v30.4h // FMINV H<d>, <Vn>.4H \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 6 | 6 | 1.33 | V1UnitSVE01[3], V1UnitV[3] | FMINVv8i16v | fminv h29, v12.8h // FMINV H<d>, <Vn>.8H \\ ASIMD FP max/min, reduce, Q-form F16 \\ 1 6 6 1.33 V1UnitV[3]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMINVv4i32v | fminv s16, v19.4s // FMINV S<d>, <Vn>.4S \\ ASIMD FP max/min, reduce, F32 and D-form F16 \\ 1 4 4 2.0 V1UnitV[2]
+# CHECK-NEXT: 2 | 13 | 13 | 0.33 | V1UnitSVE01[12], V1UnitV[12] | FMINV_VPZ_H | fminv h15, p2, z25.h // FMINV H<d>, <Pg>, <Zn>.H \\ Floating point reduction, F16 \\ 2 13 13 0.33 V1UnitSVE01[12],V1UnitSVE01[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.40 | V1UnitSVE01[10], V1UnitV[10] | FMINV_VPZ_S | fminv s4, p0, z6.s // FMINV S<d>, <Pg>, <Zn>.S \\ Floating point reduction, F32 \\ 2 11 11 0.4 V1UnitSVE01[10],V1UnitSVE01[10]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | FMINV_VPZ_D | fminv d20, p1, z5.d // FMINV D<d>, <Pg>, <Zn>.D \\ Floating point reduction, F64 \\ 2 9 9 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv1i16_indexed | fmla h23, h24, v15.h[4] // FMLA <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv1i32_indexed | fmla s9, s20, v28.s[2] // FMLA S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv1i64_indexed | fmla d12, d20, v7.d[1] // FMLA D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv8i16_indexed | fmla v29.8h, v15.8h, v10.h[4] // FMLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv2i32_indexed | fmla v2.2s, v16.2s, v28.s[0] // FMLA <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv4i32_indexed | fmla v14.4s, v14.4s, v5.s[3] // FMLA <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv2i64_indexed | fmla v10.2d, v14.2d, v21.d[1] // FMLA <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLA_ZZZI_H | fmla z2.h, z4.h, z7.h[0] // FMLA <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLA_ZZZI_S | fmla z22.s, z15.s, z1.s[3] // FMLA <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLA_ZZZI_D | fmla z1.d, z30.d, z11.d[1] // FMLA <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv4f32 | fmla v1.4s, v24.4s, v12.4s // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLAv2f64 | fmla v30.2d, v16.2d, v6.2d // FMLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLA_ZPmZZ_S | fmla z6.s, p1/m, z24.s, z24.s // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv1i16_indexed | fmls h8, h14, v7.h[4] // FMLS <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv1i32_indexed | fmls s20, s17, v5.s[2] // FMLS S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv1i64_indexed | fmls d11, d24, v29.d[0] // FMLS D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv8i16_indexed | fmls v30.8h, v18.8h, v4.h[6] // FMLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv2i32_indexed | fmls v10.2s, v27.2s, v0.s[0] // FMLS <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv4i32_indexed | fmls v27.4s, v7.4s, v24.s[0] // FMLS <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv2i64_indexed | fmls v10.2d, v22.2d, v29.d[0] // FMLS <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLS_ZZZI_H | fmls z3.h, z31.h, z0.h[6] // FMLS <Zda>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLS_ZZZI_S | fmls z30.s, z8.s, z0.s[2] // FMLS <Zda>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLS_ZZZI_D | fmls z10.d, z20.d, z0.d[1] // FMLS <Zda>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv2f32 | fmls v6.2s, v3.2s, v12.2s // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMLSv8f16 | fmls v6.8h, v15.8h, v23.8h // FMLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLS_ZPmZZ_S | fmls z26.s, p5/m, z28.s, z26.s // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHWr | fmov w15, h31 // FMOV <Wd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVHXr | fmov x21, h14 // FMOV <Xd>, <Hn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWHr | fmov h6, w5 // FMOV <Hd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVWSr | fmov s22, w0 // FMOV <Sd>, <Wn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVSWr | fmov w23, s30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVSWr | fmov w23, s30 // FMOV <Wd>, <Sn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXHr | fmov h16, x27 // FMOV <Hd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | FMOVXDr | fmov d22, x12 // FMOV <Dd>, <Xn> \\ FP transfer, from gen to low half of vec reg \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | FMOVXDHighr | fmov v7.d[1], x8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXr | fmov x26, d29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXHighr | fmov x4, v26.d[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FCPY_ZPmI_S | fmov z2.s, p0/m, #0.50000000 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FDUP_ZI_S | fmov z14.s, #0.50000000 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHr | fmov h18, h28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSr | fmov s13, s23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDr | fmov d27, d17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVHi | fmov h29, #0.50000000 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVSi | fmov s22, #0.50000000 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVDi | fmov d18, #0.50000000 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v12.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f32_ns | fmov v10.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FMOVv2f64_ns | fmov v0.2d, #0.50000000 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z2.d, p2/m, #0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z5.s, #0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMSB_ZPmZZ_S | fmsb z25.s, p5/m, z25.s, z29.s // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBHrrr | fmsub h25, h28, h12, h24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBSrrr | fmsub s31, s0, s23, s24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FMSUBDrrr | fmsub d12, d10, d20, d16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i16_indexed | fmul h18, h4, v7.h[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i32_indexed | fmul s17, s23, v30.s[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv1i64_indexed | fmul d27, d8, v10.d[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i16_indexed | fmul v10.4h, v2.4h, v7.h[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i32_indexed | fmul v5.2s, v12.2s, v9.s[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv4i32_indexed | fmul v15.4s, v30.4s, v2.s[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2i64_indexed | fmul v11.2d, v31.2d, v24.d[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmI_H | fmul z17.h, p5/m, z17.h, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_H | fmul z27.h, z30.h, z0.h[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_S | fmul z6.s, z16.s, z1.s[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZI_D | fmul z4.d, z30.d, z2.d[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULHrr | fmul h28, h14, h3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULSrr | fmul s28, s16, s24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULDrr | fmul d19, d19, d0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v0.2d, v14.2d, v20.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULv2f64 | fmul v9.2d, v29.2d, v7.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZPmZ_D | fmul z22.d, p1/m, z22.d, z3.d // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMUL_ZZZ_S | fmul z19.s, z14.s, z26.s // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i16_indexed | fmulx h18, h17, v7.h[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i32_indexed | fmulx s23, s3, v3.s[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv1i64_indexed | fmulx d3, d13, v30.d[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i16_indexed | fmulx v28.4h, v25.4h, v15.h[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i32_indexed | fmulx v3.2s, v22.2s, v23.s[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv4i32_indexed | fmulx v5.4s, v28.4s, v15.s[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2i64_indexed | fmulx v22.2d, v18.2d, v25.d[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULX16 | fmulx h20, h25, h0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULX64 | fmulx d18, d19, d22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f64 | fmulx v22.2d, v18.2d, v4.2d // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FMULXv2f32 | fmulx v16.2s, v4.2s, v27.2s // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FMULX_ZPmZ_H | fmulx z7.h, p5/m, z7.h, z21.h // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGHr | fneg h2, h9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGSr | fneg s11, s19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGDr | fneg d5, d16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f64 | fneg v26.2d, v2.2d // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FNEGv2f32 | fneg v14.2s, v24.2s // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNEG_ZPmZ_S | fneg z16.s, p0/m, z25.s // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMAD_ZPmZZ_H | fnmad z6.h, p2/m, z14.h, z21.h // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDHrrr | fnmadd h3, h18, h31, h24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDSrrr | fnmadd s8, s18, s2, s14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMADDDrrr | fnmadd d19, d29, d28, d30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLA_ZPmZZ_D | fnmla z15.d, p0/m, z8.d, z29.d // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMLS_ZPmZZ_D | fnmls z13.d, p0/m, z8.d, z12.d // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FNMSB_ZPmZZ_D | fnmsb z30.d, p7/m, z8.d, z9.d // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 1 4 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBHrrr | fnmsub h3, h29, h24, h17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBSrrr | fnmsub s29, s26, s17, s4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitV | FNMSUBDrrr | fnmsub d7, d13, d13, d4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULHrr | fnmul h3, h15, h7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULSrr | fnmul s16, s11, s2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitV | FNMULDrr | fnmul d12, d22, d14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1f16 | frecpe h20, h8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i32 | frecpe s27, s7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv1i64 | frecpe d2, d1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv4f16 | frecpe v28.4h, v27.4h // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRECPEv8f16 | frecpe v9.8h, v6.8h // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPEv2f32 | frecpe v25.2s, v28.2s // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv4f32 | frecpe v21.4s, v18.4s // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRECPEv2f64 | frecpe v10.2d, v26.2d // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_H | frecpe z14.h, z0.h // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_S | frecpe z5.s, z16.s // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPE_ZZ_D | frecpe z27.d, z11.d // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS16 | frecps h29, h19, h8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPS64 | frecps d25, d17, d12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv8f16 | frecps v12.8h, v25.8h, v4.8h // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRECPSv2f64 | frecps v7.2d, v29.2d, v18.2d // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRECPS_ZZZ_S | frecps z11.s, z31.s, z1.s // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1f16 | frecpx h18, h11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRECPXv1i32 | frecpx s13, s30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRECPX_ZPmZ_S | frecpx z15.s, p4/m, z12.s // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_H | frintn z30.h, p3/m, z31.h // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_S | frintn z17.s, p4/m, z23.s // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTN_ZPmZ_D | frintn z28.d, p1/m, z25.d // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_H | frinta z10.h, p6/m, z17.h // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_S | frinta z7.s, p4/m, z27.s // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTA_ZPmZ_D | frinta z17.d, p4/m, z17.d // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_H | frintm z26.h, p7/m, z0.h // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_S | frintm z6.s, p0/m, z28.s // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTM_ZPmZ_D | frintm z29.d, p4/m, z3.d // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_H | frintp z20.h, p4/m, z12.h // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_S | frintp z3.s, p7/m, z18.s // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTP_ZPmZ_D | frintp z28.d, p7/m, z4.d // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_H | frintz z27.h, p2/m, z12.h // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_S | frintz z12.s, p6/m, z3.s // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTZ_ZPmZ_D | frintz z12.d, p2/m, z31.d // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_H | frinti z16.h, p4/m, z9.h // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_S | frinti z18.s, p6/m, z27.s // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTI_ZPmZ_D | frinti z26.d, p2/m, z12.d // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_H | frintx z17.h, p0/m, z9.h // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_S | frintx z27.s, p7/m, z16.s // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRINTX_ZPmZ_D | frintx z21.d, p4/m, z23.d // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAHr | frinta h22, h10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTASr | frinta s15, s7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTADr | frinta d30, d10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f16 | frinta v24.4h, v10.4h // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTAv8f16 | frinta v5.8h, v3.8h // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f32 | frinta v23.2s, v22.2s // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTAv4f32 | frinta v28.4s, v28.4s // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTAv2f64 | frinta v3.2d, v13.2d // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIHr | frinti h31, h14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTISr | frinti s23, s9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIDr | frinti d8, d12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f16 | frinti v6.4h, v10.4h // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTIv8f16 | frinti v22.8h, v7.8h // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f32 | frinti v9.2s, v25.2s // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTIv4f32 | frinti v23.4s, v7.4s // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTIv2f64 | frinti v28.2d, v5.2d // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMHr | frintm h0, h21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMSr | frintm s22, s10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMDr | frintm d5, d30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f16 | frintm v3.4h, v8.4h // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTMv8f16 | frintm v19.8h, v26.8h // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f32 | frintm v15.2s, v8.2s // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTMv4f32 | frintm v20.4s, v26.4s // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTMv2f64 | frintm v20.2d, v11.2d // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNHr | frintn h12, h3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNSr | frintn s27, s14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNDr | frintn d30, d17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f16 | frintn v27.4h, v4.4h // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTNv8f16 | frintn v17.8h, v19.8h // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f32 | frintn v23.2s, v23.2s // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTNv4f32 | frintn v2.4s, v4.4s // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTNv2f64 | frintn v24.2d, v12.2d // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPHr | frintp h17, h31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPSr | frintp s14, s10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPDr | frintp d25, d13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f16 | frintp v22.4h, v25.4h // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTPv8f16 | frintp v18.8h, v11.8h // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f32 | frintp v31.2s, v5.2s // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTPv4f32 | frintp v0.4s, v24.4s // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTPv2f64 | frintp v1.2d, v3.2d // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXHr | frintx h4, h5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXSr | frintx s10, s28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXDr | frintx d17, d19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f16 | frintx v24.4h, v25.4h // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTXv8f16 | frintx v1.8h, v27.8h // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f32 | frintx v2.2s, v14.2s // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTXv4f32 | frintx v27.4s, v31.4s // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTXv2f64 | frintx v24.2d, v20.2d // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZHr | frintz h10, h29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZSr | frintz s11, s23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZDr | frintz d6, d11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f16 | frintz v13.4h, v5.4h // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRINTZv8f16 | frintz v20.8h, v21.8h // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f32 | frintz v15.2s, v19.2s // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRINTZv4f32 | frintz v11.4s, v18.4s // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRINTZv2f64 | frintz v12.2d, v22.2d // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1f16 | frsqrte h23, h26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i32 | frsqrte s23, s5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv1i64 | frsqrte d3, d11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f16 | frsqrte v16.4h, v15.4h // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | FRSQRTEv8f16 | frsqrte v14.8h, v0.8h // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | FRSQRTEv2f32 | frsqrte v6.2s, v8.2s // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f32 | frsqrte v30.4s, v21.4s // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | FRSQRTEv2f64 | frsqrte v15.2d, v14.2d // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_H | frsqrte z6.h, z30.h // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 1 6 6 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_S | frsqrte z27.s, z15.s // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | FRSQRTE_ZZ_D | frsqrte z6.d, z17.d // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS16 | frsqrts h28, h26, h1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTS32 | frsqrts s28, s1, s11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f16 | frsqrts v8.4h, v9.4h, v30.4h // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitV | FRSQRTSv4f32 | frsqrts v20.4s, v26.4s, v27.4s // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV01 | FRSQRTS_ZZZ_H | frsqrts z10.h, z25.h, z22.h // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 1 4 4 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FSCALE_ZPmZ_H | fscale z2.h, p0/m, z2.h, z21.h // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitV[2], V1UnitV02[2] | FSQRTHr | fsqrt h13, h24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 9 | 9 | 1.00 | V1UnitV[2], V1UnitV02[2] | FSQRTSr | fsqrt s20, s15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 16 | 16 | 0.25 | V1UnitV[8], V1UnitV02[8] | FSQRTDr | fsqrt d25, d21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
-# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitV[7], V1UnitV02[7] | FSQRTv4f16 | fsqrt v24.4h, v14.4h // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
-# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitV[13], V1UnitV02[13] | FSQRTv8f16 | fsqrt v12.8h, v3.8h // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
-# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitV[5], V1UnitV02[5] | FSQRTv2f32 | fsqrt v30.2s, v20.2s // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
-# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitV[9], V1UnitV02[9] | FSQRTv4f32 | fsqrt v2.4s, v24.4s // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
-# CHECK-NEXT: 1 | 16 | 16 | 0.13 | V1UnitV[15], V1UnitV02[15] | FSQRTv2f64 | fsqrt v28.2d, v25.2d // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.13 V1UnitV02[15]
-# CHECK-NEXT: 1 | 13 | 13 | 0.08 | V1UnitV[12], V1UnitV0[12], V1UnitV01[12], V1UnitV02[12] | FSQRT_ZPmZ_H | fsqrt z13.h, p3/m, z11.h // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 1 13 13 0.08 V1UnitV0[12]
-# CHECK-NEXT: 1 | 10 | 10 | 0.11 | V1UnitV[9], V1UnitV0[9], V1UnitV01[9], V1UnitV02[9] | FSQRT_ZPmZ_S | fsqrt z2.s, p7/m, z0.s // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 1 10 10 0.11 V1UnitV0[9]
-# CHECK-NEXT: 1 | 16 | 16 | 0.07 | V1UnitV[14], V1UnitV0[14], V1UnitV01[14], V1UnitV02[14] | FSQRT_ZPmZ_D | fsqrt z17.d, p6/m, z17.d // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 1 16 16 0.07 V1UnitV0[14]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmI_D | fsub z12.d, p6/m, z12.d, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBHrr | fsub h20, h11, h18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBSrr | fsub s15, s4, s24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBDrr | fsub d25, d26, d4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv8f16 | fsub v13.8h, v15.8h, v17.8h // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | FSUBv2f32 | fsub v1.2s, v31.2s, v27.2s // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZPmZ_S | fsub z24.s, p4/m, z24.s, z10.s // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUB_ZZZ_H | fsub z19.h, z8.h, z29.h // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmI_H | fsubr z22.h, p7/m, z22.h, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | FSUBR_ZPmZ_S | fsubr z13.s, p2/m, z13.s, z4.s // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTMAD_ZZI_D | ftmad z19.d, z19.d, z6.d, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSMUL_ZZZ_S | ftsmul z21.s, z0.s, z10.s // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | FTSSEL_ZZZ_D | ftssel z5.d, z0.d, z15.d // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | FMOVXDHighr | fmov v7.d[1], x8 // FMOV <Vd>.D[1], <Xn> \\ FP transfer, from gen to high half of vec reg \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXr | fmov x26, d29 // FMOV <Xd>, <Dn> \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | FMOVDXHighr | fmov x4, v26.d[1] // FMOV <Xd>, <Vn>.D[1] \\ FP transfer, from vec to gen reg \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FCPY_ZPmI_S | fmov z2.s, p0/m, #0.50000000 // FMOV <Zd>.<T>, <Pg>/M, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FDUP_ZI_S | fmov z14.s, #0.50000000 // FMOV <Zd>.<T>, #<const> \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVHr | fmov h18, h28 // FMOV <Hd>, <Hn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVSr | fmov s13, s23 // FMOV <Sd>, <Sn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVDr | fmov d27, d17 // FMOV <Dd>, <Dn> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVHi | fmov h29, #0.50000000 // FMOV <Hd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVSi | fmov s22, #0.50000000 // FMOV <Sd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVDi | fmov d18, #0.50000000 // FMOV <Dd>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVv2f32_ns | fmov v12.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVv2f32_ns | fmov v10.2s, #0.50000000 // FMOV <Vd>.<T>, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMOVv2f64_ns | fmov v0.2d, #0.50000000 // FMOV <Vd>.2D, #<imm> \\ ASIMD move, FP immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_D | mov z2.d, p2/m, #0 // FMOV <Zd>.<T>, <Pg>/M, #0.0 \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_S | mov z5.s, #0 // FMOV <Zd>.<T>, #0.0 \\ Floating point copy \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMSB_ZPmZZ_S | fmsb z25.s, p5/m, z25.s, z29.s // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMSUBHrrr | fmsub h25, h28, h12, h24 // FMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMSUBSrrr | fmsub s31, s0, s23, s24 // FMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FMSUBDrrr | fmsub d12, d10, d20, d16 // FMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv1i16_indexed | fmul h18, h4, v7.h[3] // FMUL <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv1i32_indexed | fmul s17, s23, v30.s[2] // FMUL S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv1i64_indexed | fmul d27, d8, v10.d[1] // FMUL D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv4i16_indexed | fmul v10.4h, v2.4h, v7.h[5] // FMUL <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv2i32_indexed | fmul v5.2s, v12.2s, v9.s[0] // FMUL <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv4i32_indexed | fmul v15.4s, v30.4s, v2.s[3] // FMUL <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv2i64_indexed | fmul v11.2d, v31.2d, v24.d[1] // FMUL <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZPmI_H | fmul z17.h, p5/m, z17.h, #2.0 // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZZZI_H | fmul z27.h, z30.h, z0.h[0] // FMUL <Zd>.H, <Zn>.H, <Zmhs>.H[<immh>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZZZI_S | fmul z6.s, z16.s, z1.s[0] // FMUL <Zd>.S, <Zn>.S, <Zmhs>.S[<imms>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZZZI_D | fmul z4.d, z30.d, z2.d[0] // FMUL <Zd>.D, <Zn>.D, <Zmd>.D[<immd>] \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULHrr | fmul h28, h14, h3 // FMUL <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULSrr | fmul s28, s16, s24 // FMUL <Sd>, <Sn>, <Sm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULDrr | fmul d19, d19, d0 // FMUL <Dd>, <Dn>, <Dm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv2f64 | fmul v0.2d, v14.2d, v20.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULv2f64 | fmul v9.2d, v29.2d, v7.2d // FMUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZPmZ_D | fmul z22.d, p1/m, z22.d, z3.d // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMUL_ZZZ_S | fmul z19.s, z14.s, z26.s // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv1i16_indexed | fmulx h18, h17, v7.h[1] // FMULX <Hd>, <Hn>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv1i32_indexed | fmulx s23, s3, v3.s[2] // FMULX S<d>, S<n>, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv1i64_indexed | fmulx d3, d13, v30.d[0] // FMULX D<d>, D<n>, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv4i16_indexed | fmulx v28.4h, v25.4h, v15.h[1] // FMULX <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv2i32_indexed | fmulx v3.2s, v22.2s, v23.s[3] // FMULX <Vd>.2S, <Vn>.2S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv4i32_indexed | fmulx v5.4s, v28.4s, v15.s[3] // FMULX <Vd>.4S, <Vn>.4S, <Vm>.S[<indexs>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv2i64_indexed | fmulx v22.2d, v18.2d, v25.d[1] // FMULX <Vd>.2D, <Vn>.2D, <Vm>.D[<indexd>] \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULX16 | fmulx h20, h25, h0 // FMULX <Hd>, <Hn>, <Hm> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULX64 | fmulx d18, d19, d22 // FMULX <V><d>, <V><n>, <V><m> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv2f64 | fmulx v22.2d, v18.2d, v4.2d // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FMULXv2f32 | fmulx v16.2s, v4.2s, v27.2s // FMULX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMULX_ZPmZ_H | fmulx z7.h, p5/m, z7.h, z21.h // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNEGHr | fneg h2, h9 // FNEG <Hd>, <Hn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNEGSr | fneg s11, s19 // FNEG <Sd>, <Sn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNEGDr | fneg d5, d16 // FNEG <Dd>, <Dn> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNEGv2f64 | fneg v26.2d, v2.2d // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNEGv2f32 | fneg v14.2s, v24.2s // FNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD FP negate \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FNEG_ZPmZ_S | fneg z16.s, p0/m, z25.s // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FNMAD_ZPmZZ_H | fnmad z6.h, p2/m, z14.h, z21.h // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMADDHrrr | fnmadd h3, h18, h31, h24 // FNMADD <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMADDSrrr | fnmadd s8, s18, s2, s14 // FNMADD <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMADDDrrr | fnmadd d19, d29, d28, d30 // FNMADD <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FNMLA_ZPmZZ_D | fnmla z15.d, p0/m, z8.d, z29.d // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FNMLS_ZPmZZ_D | fnmls z13.d, p0/m, z8.d, z12.d // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FNMSB_ZPmZZ_D | fnmsb z30.d, p7/m, z8.d, z9.d // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> \\ Floating point multiply accumulate \\ 2 4 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMSUBHrrr | fnmsub h3, h29, h24, h17 // FNMSUB <Hd>, <Hn>, <Hm>, <Ha> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMSUBSrrr | fnmsub s29, s26, s17, s4 // FNMSUB <Sd>, <Sn>, <Sm>, <Sa> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FNMSUBDrrr | fnmsub d7, d13, d13, d4 // FNMSUB <Dd>, <Dn>, <Dm>, <Da> \\ FP multiply accumulate \\ 1 4 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FNMULHrr | fnmul h3, h15, h7 // FNMUL <Hd>, <Hn>, <Hm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FNMULSrr | fnmul s16, s11, s2 // FNMUL <Sd>, <Sn>, <Sm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 4.00 | V1UnitSVE01, V1UnitV | FNMULDrr | fnmul d12, d22, d14 // FNMUL <Dd>, <Dn>, <Dm> \\ FP multiply \\ 1 3 3 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPEv1f16 | frecpe h20, h8 // FRECPE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPEv1i32 | frecpe s27, s7 // FRECPE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPEv1i64 | frecpe d2, d1 // FRECPE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPEv4f16 | frecpe v28.4h, v27.4h // FRECPE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRECPEv8f16 | frecpe v9.8h, v6.8h // FRECPE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPEv2f32 | frecpe v25.2s, v28.2s // FRECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPEv4f32 | frecpe v21.4s, v18.4s // FRECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPEv2f64 | frecpe v10.2d, v26.2d // FRECPE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPE_ZZ_H | frecpe z14.h, z0.h // FRECPE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPE_ZZ_S | frecpe z5.s, z16.s // FRECPE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPE_ZZ_D | frecpe z27.d, z11.d // FRECPE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRECPS16 | frecps h29, h19, h8 // FRECPS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRECPS64 | frecps d25, d17, d12 // FRECPS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRECPSv8f16 | frecps v12.8h, v25.8h, v4.8h // FRECPS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRECPSv2f64 | frecps v7.2d, v29.2d, v18.2d // FRECPS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FRECPS_ZZZ_S | frecps z11.s, z31.s, z1.s // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 2 4 4 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPXv1f16 | frecpx h18, h11 // FRECPX <Hd>, <Hn> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRECPXv1i32 | frecpx s13, s30 // FRECPX <V><d>, <V><n> \\ ASIMD reciprocal exponent \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRECPX_ZPmZ_S | frecpx z15.s, p4/m, z12.s // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Floating point reciprocal exponent \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTN_ZPmZ_H | frintn z30.h, p3/m, z31.h // FRINTN <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTN_ZPmZ_S | frintn z17.s, p4/m, z23.s // FRINTN <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTN_ZPmZ_D | frintn z28.d, p1/m, z25.d // FRINTN <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTA_ZPmZ_H | frinta z10.h, p6/m, z17.h // FRINTA <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTA_ZPmZ_S | frinta z7.s, p4/m, z27.s // FRINTA <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTA_ZPmZ_D | frinta z17.d, p4/m, z17.d // FRINTA <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTM_ZPmZ_H | frintm z26.h, p7/m, z0.h // FRINTM <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTM_ZPmZ_S | frintm z6.s, p0/m, z28.s // FRINTM <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTM_ZPmZ_D | frintm z29.d, p4/m, z3.d // FRINTM <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTP_ZPmZ_H | frintp z20.h, p4/m, z12.h // FRINTP <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTP_ZPmZ_S | frintp z3.s, p7/m, z18.s // FRINTP <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTP_ZPmZ_D | frintp z28.d, p7/m, z4.d // FRINTP <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTZ_ZPmZ_H | frintz z27.h, p2/m, z12.h // FRINTZ <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTZ_ZPmZ_S | frintz z12.s, p6/m, z3.s // FRINTZ <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTZ_ZPmZ_D | frintz z12.d, p2/m, z31.d // FRINTZ <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTI_ZPmZ_H | frinti z16.h, p4/m, z9.h // FRINTI <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTI_ZPmZ_S | frinti z18.s, p6/m, z27.s // FRINTI <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTI_ZPmZ_D | frinti z26.d, p2/m, z12.d // FRINTI <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTX_ZPmZ_H | frintx z17.h, p0/m, z9.h // FRINTX <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point round to integral, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTX_ZPmZ_S | frintx z27.s, p7/m, z16.s // FRINTX <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point round to integral, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTX_ZPmZ_D | frintx z21.d, p4/m, z23.d // FRINTX <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point round to integral, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTAHr | frinta h22, h10 // FRINTA <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTASr | frinta s15, s7 // FRINTA <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTADr | frinta d30, d10 // FRINTA <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTAv4f16 | frinta v24.4h, v10.4h // FRINTA <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTAv8f16 | frinta v5.8h, v3.8h // FRINTA <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTAv2f32 | frinta v23.2s, v22.2s // FRINTA <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTAv4f32 | frinta v28.4s, v28.4s // FRINTA <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTAv2f64 | frinta v3.2d, v13.2d // FRINTA <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTIHr | frinti h31, h14 // FRINTI <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTISr | frinti s23, s9 // FRINTI <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTIDr | frinti d8, d12 // FRINTI <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTIv4f16 | frinti v6.4h, v10.4h // FRINTI <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTIv8f16 | frinti v22.8h, v7.8h // FRINTI <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTIv2f32 | frinti v9.2s, v25.2s // FRINTI <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTIv4f32 | frinti v23.4s, v7.4s // FRINTI <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTIv2f64 | frinti v28.2d, v5.2d // FRINTI <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTMHr | frintm h0, h21 // FRINTM <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTMSr | frintm s22, s10 // FRINTM <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTMDr | frintm d5, d30 // FRINTM <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTMv4f16 | frintm v3.4h, v8.4h // FRINTM <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTMv8f16 | frintm v19.8h, v26.8h // FRINTM <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTMv2f32 | frintm v15.2s, v8.2s // FRINTM <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTMv4f32 | frintm v20.4s, v26.4s // FRINTM <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTMv2f64 | frintm v20.2d, v11.2d // FRINTM <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTNHr | frintn h12, h3 // FRINTN <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTNSr | frintn s27, s14 // FRINTN <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTNDr | frintn d30, d17 // FRINTN <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTNv4f16 | frintn v27.4h, v4.4h // FRINTN <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTNv8f16 | frintn v17.8h, v19.8h // FRINTN <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTNv2f32 | frintn v23.2s, v23.2s // FRINTN <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTNv4f32 | frintn v2.4s, v4.4s // FRINTN <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTNv2f64 | frintn v24.2d, v12.2d // FRINTN <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTPHr | frintp h17, h31 // FRINTP <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTPSr | frintp s14, s10 // FRINTP <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTPDr | frintp d25, d13 // FRINTP <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTPv4f16 | frintp v22.4h, v25.4h // FRINTP <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTPv8f16 | frintp v18.8h, v11.8h // FRINTP <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTPv2f32 | frintp v31.2s, v5.2s // FRINTP <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTPv4f32 | frintp v0.4s, v24.4s // FRINTP <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTPv2f64 | frintp v1.2d, v3.2d // FRINTP <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTXHr | frintx h4, h5 // FRINTX <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTXSr | frintx s10, s28 // FRINTX <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTXDr | frintx d17, d19 // FRINTX <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTXv4f16 | frintx v24.4h, v25.4h // FRINTX <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTXv8f16 | frintx v1.8h, v27.8h // FRINTX <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTXv2f32 | frintx v2.2s, v14.2s // FRINTX <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTXv4f32 | frintx v27.4s, v31.4s // FRINTX <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTXv2f64 | frintx v24.2d, v20.2d // FRINTX <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTZHr | frintz h10, h29 // FRINTZ <Hd>, <Hn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTZSr | frintz s11, s23 // FRINTZ <Sd>, <Sn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTZDr | frintz d6, d11 // FRINTZ <Dd>, <Dn> \\ FP round to integral \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTZv4f16 | frintz v13.4h, v5.4h // FRINTZ <Vd>.4H, <Vn>.4H \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRINTZv8f16 | frintz v20.8h, v21.8h // FRINTZ <Vd>.8H, <Vn>.8H \\ ASIMD FP round, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTZv2f32 | frintz v15.2s, v19.2s // FRINTZ <Vd>.2S, <Vn>.2S \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRINTZv4f32 | frintz v11.4s, v18.4s // FRINTZ <Vd>.4S, <Vn>.4S \\ ASIMD FP round, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRINTZv2f64 | frintz v12.2d, v22.2d // FRINTZ <Vd>.2D, <Vn>.2D \\ ASIMD FP round, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRSQRTEv1f16 | frsqrte h23, h26 // FRSQRTE <Hd>, <Hn> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRSQRTEv1i32 | frsqrte s23, s5 // FRSQRTE S<d>, S<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRSQRTEv1i64 | frsqrte d3, d11 // FRSQRTE D<d>, D<n> \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f16 | frsqrte v16.4h, v15.4h // FRSQRTE <Vd>.4H, <Vn>.4H \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | FRSQRTEv8f16 | frsqrte v14.8h, v0.8h // FRSQRTE <Vd>.8H, <Vn>.8H \\ ASIMD reciprocal and square root estimate, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | FRSQRTEv2f32 | frsqrte v6.2s, v8.2s // FRSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form F32 and scalar forms \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTEv4f32 | frsqrte v30.4s, v21.4s // FRSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTEv2f64 | frsqrte v15.2d, v14.2d // FRSQRTE <Vd>.2D, <Vn>.2D \\ ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTE_ZZ_H | frsqrte z6.h, z30.h // FRSQRTE <Zd>.H, <Zn>.H \\ Floating point reciprocal estimate, F16 \\ 2 6 6 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTE_ZZ_S | frsqrte z27.s, z15.s // FRSQRTE <Zd>.S, <Zn>.S \\ Floating point reciprocal estimate, F32 \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FRSQRTE_ZZ_D | frsqrte z6.d, z17.d // FRSQRTE <Zd>.D, <Zn>.D \\ Floating point reciprocal estimate, F64 \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRSQRTS16 | frsqrts h28, h26, h1 // FRSQRTS <Hd>, <Hn>, <Hm> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRSQRTS32 | frsqrts s28, s1, s11 // FRSQRTS <V><d>, <V><n>, <V><m> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRSQRTSv4f16 | frsqrts v8.4h, v9.4h, v30.4h // FRSQRTS <Vd>.<Th>, <Vn>.<Th>, <Vm>.<Th> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 4.00 | V1UnitSVE01, V1UnitV | FRSQRTSv4f32 | frsqrts v20.4s, v26.4s, v27.4s // FRSQRTS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD reciprocal step \\ 1 4 4 4.0 V1UnitV
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FRSQRTS_ZZZ_H | frsqrts z10.h, z25.h, z22.h // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point reciprocal step \\ 2 4 4 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSCALE_ZPmZ_H | fscale z2.h, p0/m, z2.h, z21.h // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point multiply \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 7 | 7 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FSQRTHr | fsqrt h13, h24 // FSQRT <Hd>, <Hn> \\ FP square root, H-form \\ 1 7 7 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 9 | 9 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | FSQRTSr | fsqrt s20, s15 // FSQRT <Sd>, <Sn> \\ FP square root, S-form \\ 1 9 9 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 16 | 16 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | FSQRTDr | fsqrt d25, d21 // FSQRT <Dd>, <Dn> \\ FP square root, D-form \\ 1 16 16 0.25 V1UnitV02[8]
+# CHECK-NEXT: 1 | 7 | 7 | 0.29 | V1UnitSVE0[7], V1UnitSVE01[7], V1UnitV[7], V1UnitV02[7] | FSQRTv4f16 | fsqrt v24.4h, v14.4h // FSQRT <Vd>.4H, <Vn>.4H \\ ASIMD FP square root, D-form, F16 \\ 1 7 7 0.29 V1UnitV02[7]
+# CHECK-NEXT: 1 | 13 | 13 | 0.15 | V1UnitSVE0[13], V1UnitSVE01[13], V1UnitV[13], V1UnitV02[13] | FSQRTv8f16 | fsqrt v12.8h, v3.8h // FSQRT <Vd>.8H, <Vn>.8H \\ ASIMD FP square root, Q-form, F16 \\ 1 13 13 0.15 V1UnitV02[13]
+# CHECK-NEXT: 1 | 10 | 10 | 0.40 | V1UnitSVE0[5], V1UnitSVE01[5], V1UnitV[5], V1UnitV02[5] | FSQRTv2f32 | fsqrt v30.2s, v20.2s // FSQRT <Vd>.2S, <Vn>.2S \\ ASIMD FP square root, D-form, F32 \\ 1 10 10 0.4 V1UnitV02[5]
+# CHECK-NEXT: 1 | 10 | 10 | 0.22 | V1UnitSVE0[9], V1UnitSVE01[9], V1UnitV[9], V1UnitV02[9] | FSQRTv4f32 | fsqrt v2.4s, v24.4s // FSQRT <Vd>.4S, <Vn>.4S \\ ASIMD FP square root, Q-form, F32 \\ 1 10 10 0.22 V1UnitV02[9]
+# CHECK-NEXT: 1 | 16 | 16 | 0.13 | V1UnitSVE0[15], V1UnitSVE01[15], V1UnitV[15], V1UnitV02[15] | FSQRTv2f64 | fsqrt v28.2d, v25.2d // FSQRT <Vd>.2D, <Vn>.2D \\ ASIMD FP square root, Q-form, F64 \\ 1 16 16 0.13 V1UnitV02[15]
+# CHECK-NEXT: 2 | 13 | 13 | 0.08 | V1UnitSVE0[24], V1UnitSVE01[24], V1UnitV[24], V1UnitV02[24] | FSQRT_ZPmZ_H | fsqrt z13.h, p3/m, z11.h // FSQRT <Zd>.H, <Pg>/M, <Zn>.H \\ Floating point square root, F16 \\ 2 13 13 0.08 V1UnitSVE0[24],V1UnitSVE0[24]
+# CHECK-NEXT: 2 | 10 | 10 | 0.11 | V1UnitSVE0[18], V1UnitSVE01[18], V1UnitV[18], V1UnitV02[18] | FSQRT_ZPmZ_S | fsqrt z2.s, p7/m, z0.s // FSQRT <Zd>.S, <Pg>/M, <Zn>.S \\ Floating point square root, F32 \\ 2 10 10 0.11 V1UnitSVE0[18],V1UnitSVE0[18]
+# CHECK-NEXT: 2 | 16 | 16 | 0.07 | V1UnitSVE0[28], V1UnitSVE01[28], V1UnitV[28], V1UnitV02[28] | FSQRT_ZPmZ_D | fsqrt z17.d, p6/m, z17.d // FSQRT <Zd>.D, <Pg>/M, <Zn>.D \\ Floating point square root F64 \\ 2 16 16 0.07 V1UnitSVE0[28],V1UnitSVE0[28]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSUB_ZPmI_D | fsub z12.d, p6/m, z12.d, #1.0 // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FSUBHrr | fsub h20, h11, h18 // FSUB <Hd>, <Hn>, <Hm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FSUBSrr | fsub s15, s4, s24 // FSUB <Sd>, <Sn>, <Sm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FSUBDrr | fsub d25, d26, d4 // FSUB <Dd>, <Dn>, <Dm> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FSUBv8f16 | fsub v13.8h, v15.8h, v17.8h // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | FSUBv2f32 | fsub v1.2s, v31.2s, v27.2s // FSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD FP arith, normal \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSUB_ZPmZ_S | fsub z24.s, p4/m, z24.s, z10.s // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSUB_ZZZ_H | fsub z19.h, z8.h, z29.h // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSUBR_ZPmI_H | fsubr z22.h, p7/m, z22.h, #0.5 // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FSUBR_ZPmZ_S | fsubr z13.s, p2/m, z13.s, z4.s // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Floating point arithmetic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FTMAD_ZZI_D | ftmad z19.d, z19.d, z6.d, #3 // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FTSMUL_ZZZ_S | ftsmul z21.s, z0.s, z10.s // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 3 | 3 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FTSSEL_ZZZ_D | ftssel z5.d, z0.d, z15.d // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Floating point trigonometric \\ 2 3 3 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | hint #9 // HINT #<imm> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HLT | hlt #0x7a67 // HLT #<imm> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HVC | hvc #0xecb9 // HVC #<imm> \\ No description \\ No scheduling info
@@ -4948,50 +4951,50 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x29 // INCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl64 // INCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCW_XPiI | incw x2, vl8 // INCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z24.d // INCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z23.d, vl8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCD_ZPiI | incd z20.d, vl2, mul #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z29.h // INCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z28.h, vl16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCH_ZPiI | inch z29.h, vl16, mul #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z17.s // INCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z31.s, mul3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | INCW_ZPiI | incw z12.s, vl4, mul #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCD_ZPiI | incd z24.d // INCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCD_ZPiI | incd z23.d, vl8 // INCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCD_ZPiI | incd z20.d, vl2, mul #11 // INCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCH_ZPiI | inch z29.h // INCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCH_ZPiI | inch z28.h, vl16 // INCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCH_ZPiI | inch z29.h, vl16, mul #13 // INCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCW_ZPiI | incw z17.s // INCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCW_ZPiI | incw z31.s, mul3 // INCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | INCW_ZPiI | incw z12.s, vl4, mul #5 // INCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | INCP_XP_H | incp x7, p0.h // INCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | INCP_ZP_D | incp z2.d, p6.d // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_B | index z8.b, #15, w14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_H | index z14.h, #11, w10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_IR_S | index z17.s, #14, w21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_IR_D | index z5.d, #11, x15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_B | index z16.b, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_H | index z13.h, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_II_S | index z20.s, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_II_D | index z13.d, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 1 5 5 0.5 V1UnitV0[2]
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_B | index z28.b, w27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_H | index z13.h, w28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RI_S | index z22.s, w7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RI_D | index z0.d, x25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_B | index z6.b, w24, w8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_H | index z20.h, w4, w7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | INDEX_RR_S | index z10.s, w2, w19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 2 7 7 1.0 V1UnitM0,V1UnitV0
-# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | INDEX_RR_D | index z2.d, x23, x7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 2 8 8 0.5 V1UnitM0[2],V1UnitV0[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v15.b[7], v6.b[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v17.h[1], v3.h[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v4.s[1], v7.s[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v22.d[1], v25.d[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v14.b[3], w12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v25.h[2], w14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v14.s[1], w29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v19.d[1], x27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZV_D | insr z4.d, d0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | INSR_ZR_D | insr z4.d, x14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | INCP_ZP_D | incp z2.d, p6.d // INCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_IR_B | index z8.b, #15, w14 // INDEX <Zd>.B, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_IR_H | index z14.h, #11, w10 // INDEX <Zd>.H, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_IR_S | index z17.s, #14, w21 // INDEX <Zd>.S, #<imm>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_IR_D | index z5.d, #11, x15 // INDEX <Zd>.D, #<imm>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_II_B | index z16.b, #-2, #0 // INDEX <Zd>.B, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_II_H | index z13.h, #13, #2 // INDEX <Zd>.H, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_II_S | index z20.s, #6, #1 // INDEX <Zd>.S, #<imm1>, #<imm2> \\ Horizontal operations, B, H, S form, imm, imm \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | INDEX_II_D | index z13.d, #-15, #0 // INDEX <Zd>.D, #<imm1>, #<imm2> \\ Horizontal operations, D form, imm, imm \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RI_B | index z28.b, w27, #1 // INDEX <Zd>.B, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RI_H | index z13.h, w28, #-5 // INDEX <Zd>.H, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RI_S | index z22.s, w7, #8 // INDEX <Zd>.S, W<n>, #<imm> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RI_D | index z0.d, x25, #-8 // INDEX <Zd>.D, X<n>, #<imm> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RR_B | index z6.b, w24, w8 // INDEX <Zd>.B, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RR_H | index z20.h, w4, w7 // INDEX <Zd>.H, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 7 | 7 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RR_S | index z10.s, w2, w19 // INDEX <Zd>.S, W<n>, W<m> \\ Horizontal operations, B, H, S form, scalar, imm/ scalar/ imm, scalar \\ 3 7 7 1.0 V1UnitM0,V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 3 | 8 | 8 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | INDEX_RR_D | index z2.d, x23, x7 // INDEX <Zd>.D, X<n>, X<m> \\ Horizontal operations, D form, scalar, imm/ scalar/ imm, scalar \\ 3 8 8 0.5 V1UnitM0[2],V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi8lane | mov v15.b[7], v6.b[15] // INS <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi16lane | mov v17.h[1], v3.h[2] // INS <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi32lane | mov v4.s[1], v7.s[0] // INS <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi64lane | mov v22.d[1], v25.d[1] // INS <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi8gpr | mov v14.b[3], w12 // INS <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi16gpr | mov v25.h[2], w14 // INS <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi32gpr | mov v14.s[1], w29 // INS <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi64gpr | mov v19.d[1], x27 // INS <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | INSR_ZV_D | insr z4.d, d0 // INSR <Zdn>.<T>, <V><m> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | INSR_ZR_D | insr z4.d, x14 // INSR <Zdn>.<T>, <R><m> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb // ISB <option> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | ISB | isb #1 // ISB #<imm> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_VPZ_B | lasta b3, p1, z3.b // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTA_RPZ_B | lasta w16, p0, z10.b // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_VPZ_D | lastb d3, p1, z17.d // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 1 3 3 1.0 V1UnitV1
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LASTB_RPZ_D | lastb x4, p3, z31.d // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 2 6 6 1.0 V1UnitV1,V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LASTA_VPZ_B | lasta b3, p1, z3.b // LASTA <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LASTA_RPZ_B | lasta w16, p0, z10.b // LASTA <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LASTB_VPZ_D | lastb d3, p1, z17.d // LASTB <V><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, SIMD and FP scalar form \\ 2 3 3 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LASTB_RPZ_D | lastb x4, p3, z31.d // LASTB <R><d>, <Pg>, <Zn>.<T> \\ Extract/insert operation, scalar \\ 3 6 6 1.0 V1UnitSVE1[2],V1UnitSVE1[2],V1UnitM0
# CHECK-NEXT: 1 | 6 | 6 | 3.00 | V1UnitL | LD1Onev8b | ld1 { v23.8b }, [x11] // LD1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 1 6 6 3.0 V1UnitL
# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v25.8b }, [x30], #8 // LD1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
# CHECK-NEXT: 2 | 6 | 6 | 3.00 | V1UnitI, V1UnitL | LD1Onev8b_POST | ld1 { v14.8b }, [x1], x26 // LD1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 1 reg, D-form \\ 2 6 6 3.0 V1UnitL,V1UnitI
@@ -5088,18 +5091,18 @@ test:
# CHECK-NEXT: 1 | 7 | 7 | 0.75 | V1UnitL[4] | LD1Fourv2d | ld1 { v18.2d, v19.2d, v20.2d, v21.2d }, [x6] // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 1 7 7 0.75 V1UnitL[4]
# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v3.2d, v4.2d, v5.2d, v6.2d }, [x3], #64 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
# CHECK-NEXT: 2 | 7 | 7 | 0.75 | V1UnitI, V1UnitL[4] | LD1Fourv2d_POST | ld1 { v6.2d, v7.2d, v8.2d, v9.2d }, [x17], x18 // LD1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, multiple, 4 reg, Q-form \\ 2 7 7 0.75 V1UnitL[4],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i8 | ld1 { v18.b }[3], [x23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i16 | ld1 { v18.h }[3], [x1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i32 | ld1 { v8.s }[0], [x24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1i64 | ld1 { v11.d }[0], [x13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v23.b }[1], [x13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i8_POST | ld1 { v10.b }[9], [x25], x14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v6.h }[2], [x26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i16_POST | ld1 { v30.h }[6], [x27], x3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v5.s }[1], [x10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i32_POST | ld1 { v13.s }[3], [x6], x24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v26.d }[1], [x28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1i64_POST | ld1 { v1.d }[1], [x20], x30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1i8 | ld1 { v18.b }[3], [x23] // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1i16 | ld1 { v18.h }[3], [x1] // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1i32 | ld1 { v8.s }[0], [x24] // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1i64 | ld1 { v11.d }[0], [x13] // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 1 element, one lane, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i8_POST | ld1 { v23.b }[1], [x13], #1 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i8_POST | ld1 { v10.b }[9], [x25], x14 // LD1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i16_POST | ld1 { v6.h }[2], [x26], #2 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i16_POST | ld1 { v30.h }[6], [x27], x3 // LD1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i32_POST | ld1 { v5.s }[1], [x10], #4 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i32_POST | ld1 { v13.s }[3], [x6], x24 // LD1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i64_POST | ld1 { v26.d }[1], [x28], #8 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1i64_POST | ld1 { v1.d }[1], [x20], x30 // LD1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, one lane, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z20.b }, p1/z, [x25] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_IMM | ld1b { z10.b }, p1/z, [x16, #-1, mul vl] // LD1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H_IMM | ld1b { z31.h }, p1/z, [x4] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
@@ -5112,17 +5115,17 @@ test:
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_H | ld1b { z26.h }, p5/z, [x5, x21] // LD1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_S | ld1b { z22.s }, p3/z, [x16, x12] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1B_D | ld1b { z7.d }, p5/z, [x18, x12] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D_UXTW | ld1b { z2.d }, p0/z, [x15, z18.d, uxtw] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_S_SXTW | ld1b { z20.s }, p6/z, [x2, z0.s, sxtw] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D | ld1b { z15.d }, p4/z, [x23, z9.d] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1B_S_IMM | ld1b { z8.s }, p4/z, [z25.s, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1B_D_IMM | ld1b { z13.d }, p2/z, [z3.d, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1D_SXTW_SCALED | ld1d { z21.d }, p1/z, [x24, z31.d, sxtw #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_SXTW | ld1d { z7.d }, p0/z, [x13, z15.d, sxtw] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1D_SCALED | ld1d { z14.d }, p1/z, [x26, z27.d, lsl #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D | ld1d { z30.d }, p7/z, [x14, z16.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_IMM | ld1d { z22.d }, p1/z, [z15.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1D_IMM | ld1d { z8.d }, p4/z, [z12.d, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1B_D_UXTW | ld1b { z2.d }, p0/z, [x15, z18.d, uxtw] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1B_S_SXTW | ld1b { z20.s }, p6/z, [x2, z0.s, sxtw] // LD1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1B_D | ld1b { z15.d }, p4/z, [x23, z9.d] // LD1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1B_S_IMM | ld1b { z8.s }, p4/z, [z25.s, #22] // LD1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1B_D_IMM | ld1b { z13.d }, p2/z, [z3.d, #30] // LD1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1D_SXTW_SCALED | ld1d { z21.d }, p1/z, [x24, z31.d, sxtw #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1D_SXTW | ld1d { z7.d }, p0/z, [x13, z15.d, sxtw] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1D_SCALED | ld1d { z14.d }, p1/z, [x26, z27.d, lsl #3] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1D | ld1d { z30.d }, p7/z, [x14, z16.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1D_IMM | ld1d { z22.d }, p1/z, [z15.d] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1D_IMM | ld1d { z8.d }, p4/z, [z12.d, #200] // LD1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_IMM | ld1h { z3.h }, p2/z, [x21] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_IMM | ld1h { z15.h }, p0/z, [x25, #-3, mul vl] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S_IMM | ld1h { z9.s }, p1/z, [x17] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
@@ -5132,40 +5135,40 @@ test:
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H | ld1h { z26.h }, p5/z, [x10, x19, lsl #1] // LD1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_S | ld1h { z29.s }, p7/z, [x23, x11, lsl #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1H_D | ld1h { z2.d }, p5/z, [x30, x9, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_SXTW_SCALED | ld1h { z14.s }, p7/z, [x14, z28.s, sxtw #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_D_SXTW_SCALED | ld1h { z28.d }, p7/z, [x8, z9.d, sxtw #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_UXTW | ld1h { z16.d }, p5/z, [x7, z9.d, uxtw] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_S_UXTW | ld1h { z27.s }, p4/z, [x4, z7.s, uxtw] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_D_SCALED | ld1h { z6.d }, p7/z, [x30, z26.d, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D | ld1h { z11.d }, p2/z, [x20, z25.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z6.s }, p7/z, [z31.s] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z1.s }, p3/z, [z12.s, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z7.d }, p7/z, [z9.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z13.d }, p3/z, [z5.d, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8b | ld1r { v8.8b }, [x23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v4.8b }, [x25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8b_POST | ld1r { v14.8b }, [x24], x14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv16b | ld1r { v8.16b }, [x24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v21.16b }, [x30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv16b_POST | ld1r { v1.16b }, [x3], x9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4h | ld1r { v28.4h }, [x9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v10.4h }, [x27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4h_POST | ld1r { v12.4h }, [x8], x20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv8h | ld1r { v3.8h }, [x16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v27.8h }, [x18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv8h_POST | ld1r { v20.8h }, [x20], x4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2s | ld1r { v10.2s }, [x20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v28.2s }, [x8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2s_POST | ld1r { v4.2s }, [x0], x12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv4s | ld1r { v11.4s }, [x3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v18.4s }, [x3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv4s_POST | ld1r { v2.4s }, [x4], x1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv1d | ld1r { v3.1d }, [x15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v16.1d }, [x2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv1d_POST | ld1r { v24.1d }, [x21], x3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD1Rv2d | ld1r { v18.2d }, [x0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x16], x28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1H_S_SXTW_SCALED | ld1h { z14.s }, p7/z, [x14, z28.s, sxtw #1] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1H_D_SXTW_SCALED | ld1h { z28.d }, p7/z, [x8, z9.d, sxtw #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1H_D_UXTW | ld1h { z16.d }, p5/z, [x7, z9.d, uxtw] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1H_S_UXTW | ld1h { z27.s }, p4/z, [x4, z7.s, uxtw] // LD1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1H_D_SCALED | ld1h { z6.d }, p7/z, [x30, z26.d, lsl #1] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1H_D | ld1h { z11.d }, p2/z, [x20, z25.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z6.s }, p7/z, [z31.s] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1H_S_IMM | ld1h { z1.s }, p3/z, [z12.s, #8] // LD1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z7.d }, p7/z, [z9.d] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1H_D_IMM | ld1h { z13.d }, p3/z, [z5.d, #8] // LD1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8b | ld1r { v8.8b }, [x23] // LD1R { <Vt>.8B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8b_POST | ld1r { v4.8b }, [x25], #1 // LD1R { <Vt>.8B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8b_POST | ld1r { v14.8b }, [x24], x14 // LD1R { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv16b | ld1r { v8.16b }, [x24] // LD1R { <Vt>.16B }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv16b_POST | ld1r { v21.16b }, [x30], #1 // LD1R { <Vt>.16B }, [<Xn|SP>], #1 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv16b_POST | ld1r { v1.16b }, [x3], x9 // LD1R { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4h | ld1r { v28.4h }, [x9] // LD1R { <Vt>.4H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4h_POST | ld1r { v10.4h }, [x27], #2 // LD1R { <Vt>.4H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4h_POST | ld1r { v12.4h }, [x8], x20 // LD1R { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8h | ld1r { v3.8h }, [x16] // LD1R { <Vt>.8H }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8h_POST | ld1r { v27.8h }, [x18], #2 // LD1R { <Vt>.8H }, [<Xn|SP>], #2 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv8h_POST | ld1r { v20.8h }, [x20], x4 // LD1R { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2s | ld1r { v10.2s }, [x20] // LD1R { <Vt>.2S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2s_POST | ld1r { v28.2s }, [x8], #4 // LD1R { <Vt>.2S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2s_POST | ld1r { v4.2s }, [x0], x12 // LD1R { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4s | ld1r { v11.4s }, [x3] // LD1R { <Vt>.4S }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4s_POST | ld1r { v18.4s }, [x3], #4 // LD1R { <Vt>.4S }, [<Xn|SP>], #4 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv4s_POST | ld1r { v2.4s }, [x4], x1 // LD1R { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv1d | ld1r { v3.1d }, [x15] // LD1R { <Vt>.1D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, D-form, D \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv1d_POST | ld1r { v16.1d }, [x2], #8 // LD1R { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv1d_POST | ld1r { v24.1d }, [x21], x3 // LD1R { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, D-form, D \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2d | ld1r { v18.2d }, [x0] // LD1R { <Vt>.2D }, [<Xn|SP>] \\ ASIMD load, 1 element, all lanes, Q-form \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x18], #8 // LD1R { <Vt>.2D }, [<Xn|SP>], #8 \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD1Rv2d_POST | ld1r { v8.2d }, [x16], x28 // LD1R { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 1 element, all lanes, Q-form \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z13.b }, p0/z, [x9] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_IMM | ld1rb { z30.b }, p6/z, [x21, #28] // LD1RB { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1RB_H_IMM | ld1rb { z10.h }, p1/z, [x9] // LD1RB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load broadcast, scalar + imm \\ 1 6 6 2.0 V1UnitL01
@@ -5219,251 +5222,251 @@ test:
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_H | ld1sb { z3.h }, p5/z, [x10, x23] // LD1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_S | ld1sb { z16.s }, p7/z, [x27, x16] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SB_D | ld1sb { z13.d }, p7/z, [x28, x18] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D_UXTW | ld1sb { z30.d }, p6/z, [x22, z27.d, uxtw] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_S_UXTW | ld1sb { z23.s }, p5/z, [x17, z10.s, uxtw] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D | ld1sb { z23.d }, p2/z, [x28, z10.d] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SB_S_IMM | ld1sb { z14.s }, p4/z, [z18.s, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SB_D_IMM | ld1sb { z5.d }, p0/z, [z25.d, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SB_D_UXTW | ld1sb { z30.d }, p6/z, [x22, z27.d, uxtw] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SB_S_UXTW | ld1sb { z23.s }, p5/z, [x17, z10.s, uxtw] // LD1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SB_D | ld1sb { z23.d }, p2/z, [x28, z10.d] // LD1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SB_S_IMM | ld1sb { z14.s }, p4/z, [z18.s, #24] // LD1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SB_D_IMM | ld1sb { z5.d }, p0/z, [z25.d, #31] // LD1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z8.s }, p3/z, [x21] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_S_IMM | ld1sh { z29.s }, p4/z, [x11, #-4, mul vl] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z13.d }, p6/z, [x18] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SH_D_IMM | ld1sh { z19.d }, p2/z, [x29, #-3, mul vl] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_S | ld1sh { z28.s }, p0/z, [x6, x28, lsl #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LD1SH_D | ld1sh { z26.d }, p0/z, [x7, x12, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_S_UXTW_SCALED | ld1sh { z22.s }, p3/z, [x7, z1.s, uxtw #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_D_SXTW_SCALED | ld1sh { z3.d }, p6/z, [x11, z14.d, sxtw #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D_SXTW | ld1sh { z27.d }, p3/z, [x19, z23.d, sxtw] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_S_SXTW | ld1sh { z12.s }, p5/z, [x27, z13.s, sxtw] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_D_SCALED | ld1sh { z9.d }, p0/z, [x22, z8.d, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D | ld1sh { z22.d }, p0/z, [x27, z12.d] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SH_S_IMM | ld1sh { z1.s }, p2/z, [z9.s, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SH_D_IMM | ld1sh { z11.d }, p5/z, [z30.d, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SH_S_UXTW_SCALED | ld1sh { z22.s }, p3/z, [x7, z1.s, uxtw #1] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SH_D_SXTW_SCALED | ld1sh { z3.d }, p6/z, [x11, z14.d, sxtw #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SH_D_SXTW | ld1sh { z27.d }, p3/z, [x19, z23.d, sxtw] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SH_S_SXTW | ld1sh { z12.s }, p5/z, [x27, z13.s, sxtw] // LD1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SH_D_SCALED | ld1sh { z9.d }, p0/z, [x22, z8.d, lsl #1] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SH_D | ld1sh { z22.d }, p0/z, [x27, z12.d] // LD1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SH_S_IMM | ld1sh { z1.s }, p2/z, [z9.s, #44] // LD1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SH_D_IMM | ld1sh { z11.d }, p5/z, [z30.d, #34] // LD1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z7.d }, p1/z, [x19] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D_IMM | ld1sw { z28.d }, p1/z, [x26, #4, mul vl] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LD1SW_D | ld1sw { z26.d }, p4/z, [x20, x17, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous load, scalar + scalar \\ 1 6 6 2.0 V1UnitL01
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SW_D_SXTW_SCALED | ld1sw { z22.d }, p1/z, [x14, z23.d, sxtw #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_SXTW | ld1sw { z4.d }, p3/z, [x20, z15.d, sxtw] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1SW_D_SCALED | ld1sw { z1.d }, p4/z, [x20, z23.d, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D | ld1sw { z2.d }, p7/z, [x4, z0.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z12.d }, p7/z, [z21.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z27.d }, p3/z, [z10.d, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_SXTW_SCALED | ld1w { z9.s }, p0/z, [x18, z9.s, sxtw #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_D_UXTW_SCALED | ld1w { z14.d }, p5/z, [x26, z2.d, uxtw #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_UXTW | ld1w { z31.d }, p6/z, [x17, z2.d, uxtw] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_SXTW | ld1w { z14.s }, p2/z, [x18, z28.s, sxtw] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_D_SCALED | ld1w { z13.d }, p3/z, [x5, z11.d, lsl #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D | ld1w { z24.d }, p3/z, [x2, z17.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_IMM | ld1w { z4.s }, p0/z, [z1.s] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLD1W_IMM | ld1w { z17.s }, p6/z, [z26.s, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z31.d }, p7/z, [z22.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z2.d }, p3/z, [z6.d, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov8b | ld2 { v13.8b, v14.8b }, [x4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov8b_POST | ld2 { v20.8b, v21.8b }, [x11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov8b_POST | ld2 { v13.8b, v14.8b }, [x4], x7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov16b | ld2 { v26.16b, v27.16b }, [x16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v15.16b, v16.16b }, [x3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v24.16b, v25.16b }, [x7], x30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov4h | ld2 { v0.4h, v1.4h }, [x21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x22], x1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov8h | ld2 { v8.8h, v9.8h }, [x28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v14.8h, v15.8h }, [x19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v28.8h, v29.8h }, [x26], x7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitV | LD2Twov2s | ld2 { v2.2s, v3.2s }, [x16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov2s_POST | ld2 { v23.2s, v24.2s }, [x5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitV | LD2Twov2s_POST | ld2 { v22.2s, v23.2s }, [x11], x12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov4s | ld2 { v22.4s, v23.4s }, [x4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v27.4s, v28.4s }, [x18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v22.4s, v23.4s }, [x26], x29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Twov2d | ld2 { v22.2d, v23.2d }, [x17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v12.2d, v13.2d }, [x19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v6.2d, v7.2d }, [x11], x24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i8 | ld2 { v29.b, v30.b }[3], [x1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i16 | ld2 { v23.h, v24.h }[7], [x14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i32 | ld2 { v26.s, v27.s }[1], [x17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2i64 | ld2 { v1.d, v2.d }[0], [x10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i8_POST | ld2 { v20.b, v21.b }[9], [x24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i8_POST | ld2 { v29.b, v30.b }[6], [x18], x19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i16_POST | ld2 { v2.h, v3.h }[3], [x12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i16_POST | ld2 { v11.h, v12.h }[3], [x18], x17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i32_POST | ld2 { v15.s, v16.s }[1], [x7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i32_POST | ld2 { v29.s, v30.s }[1], [x12], x0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i64_POST | ld2 { v1.d, v2.d }[1], [x3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2i64_POST | ld2 { v10.d, v11.d }[1], [x18], x27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z9.b, z10.b }, p2/z, [x22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B_IMM | ld2b { z28.b, z29.b }, p3/z, [x22, #4, mul vl] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2B | ld2b { z26.b, z27.b }, p1/z, [x3, x12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z12.d, z13.d }, p5/z, [x24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D_IMM | ld2d { z22.d, z23.d }, p2/z, [x21, #-2, mul vl] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2D | ld2d { z22.d, z23.d }, p6/z, [x14, x4, lsl #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z5.h, z6.h }, p5/z, [x20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2H_IMM | ld2h { z27.h, z28.h }, p7/z, [x11, #14, mul vl] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 3 | 10 | 10 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitV[2], V1UnitV01[2] | LD2H | ld2h { z18.h, z19.h }, p3/z, [x9, x17, lsl #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 10 10 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitS[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv8b | ld2r { v10.8b, v11.8b }, [x20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v18.8b, v19.8b }, [x11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v28.8b, v29.8b }, [x30], x14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv16b | ld2r { v10.16b, v11.16b }, [x23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v24.16b, v25.16b }, [x1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v20.16b, v21.16b }, [x11], x7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv4h | ld2r { v25.4h, v26.4h }, [x11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v28.4h, v29.4h }, [x18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v21.4h, v22.4h }, [x2], x17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv8h | ld2r { v23.8h, v24.8h }, [x10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v19.8h, v20.8h }, [x29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v13.8h, v14.8h }, [x13], x5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv2s | ld2r { v25.2s, v26.2s }, [x19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v5.2s, v6.2s }, [x28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v4.2s, v5.2s }, [x14], x19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv4s | ld2r { v8.4s, v9.4s }, [x17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v22.4s, v23.4s }, [x5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v29.4s, v30.4s }, [x4], x18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv1d | ld2r { v9.1d, v10.1d }, [x25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v15.1d, v16.1d }, [x26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v10.1d, v11.1d }, [x28], x26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitV[2] | LD2Rv2d | ld2r { v26.2d, v27.2d }, [x8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v14.2d, v15.2d }, [x3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v24.2d, v25.2d }, [x6], x14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z21.s, z22.s }, p4/z, [x12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W_IMM | ld2w { z29.s, z30.s }, p2/z, [x19, #6, mul vl] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 2 8 8 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | LD2W | ld2w { z18.s, z19.s }, p6/z, [x22, x22, lsl #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 2 9 9 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev8b | ld3 { v8.8b, v9.8b, v10.8b }, [x0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v6.8b, v7.8b, v8.8b }, [x26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v20.8b, v21.8b, v22.8b }, [x25], x24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev16b | ld3 { v15.16b, v16.16b, v17.16b }, [x5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v19.16b, v20.16b, v21.16b }, [x3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v26.16b, v27.16b, v28.16b }, [x8], x29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev4h | ld3 { v15.4h, v16.4h, v17.4h }, [x8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v4.4h, v5.4h, v6.4h }, [x5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v24.4h, v25.4h, v26.4h }, [x25], x0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev8h | ld3 { v7.8h, v8.8h, v9.8h }, [x21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v4.8h, v5.8h, v6.8h }, [x26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v12.8h, v13.8h, v14.8h }, [x0], x25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev2s | ld3 { v16.2s, v17.2s, v18.2s }, [x0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v9.2s, v10.2s, v11.2s }, [x1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v27.2s, v28.2s, v29.2s }, [x23], x4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev4s | ld3 { v12.4s, v13.4s, v14.4s }, [x25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v12.4s, v13.4s, v14.4s }, [x27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v2.4s, v3.4s, v4.4s }, [x22], x21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Threev2d | ld3 { v10.2d, v11.2d, v12.2d }, [x18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v25.2d, v26.2d, v27.2d }, [x4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v6.2d, v7.2d, v8.2d }, [x10], x24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i8 | ld3 { v17.b, v18.b, v19.b }[2], [x27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i16 | ld3 { v18.h, v19.h, v20.h }[5], [x16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i32 | ld3 { v1.s, v2.s, v3.s }[3], [x14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3i64 | ld3 { v5.d, v6.d, v7.d }[1], [x14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i8_POST | ld3 { v16.b, v17.b, v18.b }[3], [x15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i8_POST | ld3 { v14.b, v15.b, v16.b }[4], [x23], x6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i16_POST | ld3 { v11.h, v12.h, v13.h }[1], [x28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i16_POST | ld3 { v4.h, v5.h, v6.h }[2], [x5], x15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i32_POST | ld3 { v26.s, v27.s, v28.s }[0], [x14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i32_POST | ld3 { v1.s, v2.s, v3.s }[0], [x26], x20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i64_POST | ld3 { v14.d, v15.d, v16.d }[1], [x30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3i64_POST | ld3 { v23.d, v24.d, v25.d }[0], [x24], x14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3B_IMM | ld3b { z29.b - z31.b }, p3/z, [x17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3B_IMM | ld3b { z23.b - z25.b }, p7/z, [x12, #18, mul vl] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3B | ld3b { z23.b - z25.b }, p3/z, [x12, x12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3D_IMM | ld3d { z20.d - z22.d }, p2/z, [x6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3D_IMM | ld3d { z1.d - z3.d }, p2/z, [x9, #-15, mul vl] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3D | ld3d { z13.d - z15.d }, p6/z, [x27, x30, lsl #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3H_IMM | ld3h { z26.h - z28.h }, p1/z, [x29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3H_IMM | ld3h { z14.h - z16.h }, p3/z, [x18, #9, mul vl] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3H | ld3h { z5.h - z7.h }, p3/z, [x6, x21, lsl #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv8b | ld3r { v24.8b, v25.8b, v26.8b }, [x10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v14.8b, v15.8b, v16.8b }, [x11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v22.8b, v23.8b, v24.8b }, [x0], x11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv16b | ld3r { v17.16b, v18.16b, v19.16b }, [x3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v7.16b, v8.16b, v9.16b }, [x29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v3.16b, v4.16b, v5.16b }, [x20], x5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv4h | ld3r { v3.4h, v4.4h, v5.4h }, [x1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v8.4h, v9.4h, v10.4h }, [x3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v4.4h, v5.4h, v6.4h }, [x0], x28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv8h | ld3r { v6.8h, v7.8h, v8.8h }, [x28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v4.8h, v5.8h, v6.8h }, [x11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v3.8h, v4.8h, v5.8h }, [x17], x0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv2s | ld3r { v18.2s, v19.2s, v20.2s }, [x24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v8.2s, v9.2s, v10.2s }, [x22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v12.2s, v13.2s, v14.2s }, [x0], x14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv4s | ld3r { v28.4s, v29.4s, v30.4s }, [x2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v21.4s, v22.4s, v23.4s }, [x22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v28.4s, v29.4s, v30.4s }, [x13], x25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv1d | ld3r { v1.1d, v2.1d, v3.1d }, [x28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v0.1d, v1.1d, v2.1d }, [x7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v22.1d, v23.1d, v24.1d }, [x9], x15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD3Rv2d | ld3r { v8.2d, v9.2d, v10.2d }, [x3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v3.2d, v4.2d, v5.2d }, [x25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v8.2d, v9.2d, v10.2d }, [x18], x13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3W_IMM | ld3w { z23.s - z25.s }, p1/z, [x8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | LD3W_IMM | ld3w { z6.s - z8.s }, p4/z, [x0, #18, mul vl] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 2 11 11 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 3 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitV[6], V1UnitV01[6] | LD3W | ld3w { z27.s - z29.s }, p3/z, [x3, x6, lsl #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 3 13 13 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitS[6]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv8b | ld4 { v6.8b, v7.8b, v8.8b, v9.8b }, [x27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v20.8b, v21.8b, v22.8b, v23.8b }, [x10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v18.8b, v19.8b, v20.8b, v21.8b }, [x24], x11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv16b | ld4 { v11.16b, v12.16b, v13.16b, v14.16b }, [x5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v10.16b, v11.16b, v12.16b, v13.16b }, [x12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v12.16b, v13.16b, v14.16b, v15.16b }, [x4], x17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv4h | ld4 { v21.4h, v22.4h, v23.4h, v24.4h }, [x14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v10.4h, v11.4h, v12.4h, v13.4h }, [x19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x15], x17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv8h | ld4 { v9.8h, v10.8h, v11.8h, v12.8h }, [x1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v4.8h, v5.8h, v6.8h, v7.8h }, [x17], x17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Fourv2s | ld4 { v23.2s, v24.2s, v25.2s, v26.2s }, [x24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v25.2s, v26.2s, v27.2s, v28.2s }, [x3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v22.2s, v23.2s, v24.2s, v25.2s }, [x14], x15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv4s | ld4 { v17.4s, v18.4s, v19.4s, v20.4s }, [x4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v25.4s, v26.4s, v27.4s, v28.4s }, [x19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v4.4s, v5.4s, v6.4s, v7.4s }, [x28], x3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | LD4Fourv2d | ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x27], x4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i8 | ld4 { v4.b, v5.b, v6.b, v7.b }[12], [x27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i16 | ld4 { v5.h, v6.h, v7.h, v8.h }[0], [x4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i32 | ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4i64 | ld4 { v2.d, v3.d, v4.d, v5.d }[0], [x29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i8_POST | ld4 { v26.b, v27.b, v28.b, v29.b }[4], [x13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i8_POST | ld4 { v10.b, v11.b, v12.b, v13.b }[11], [x24], x21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i16_POST | ld4 { v8.h, v9.h, v10.h, v11.h }[0], [x17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i16_POST | ld4 { v21.h, v22.h, v23.h, v24.h }[2], [x21], x24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i32_POST | ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i32_POST | ld4 { v20.s, v21.s, v22.s, v23.s }[1], [x27], x16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i64_POST | ld4 { v18.d, v19.d, v20.d, v21.d }[1], [x26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4i64_POST | ld4 { v8.d, v9.d, v10.d, v11.d }[0], [x23], x0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4B_IMM | ld4b { z16.b - z19.b }, p3/z, [x23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4B_IMM | ld4b { z7.b - z10.b }, p5/z, [x3, #12, mul vl] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4B | ld4b { z7.b - z10.b }, p4/z, [x20, x12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4D_IMM | ld4d { z26.d - z29.d }, p7/z, [x10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4D_IMM | ld4d { z27.d - z30.d }, p0/z, [x6, #24, mul vl] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4D | ld4d { z7.d - z10.d }, p4/z, [x25, x8, lsl #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p4/z, [x19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p1/z, [x16, #-8, mul vl] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4H | ld4h { z10.h - z13.h }, p2/z, [x8, x28, lsl #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv8b | ld4r { v20.8b, v21.8b, v22.8b, v23.8b }, [x23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v24.8b, v25.8b, v26.8b, v27.8b }, [x15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v4.8b, v5.8b, v6.8b, v7.8b }, [x26], x6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv16b | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x29], x11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv4h | ld4r { v16.4h, v17.4h, v18.4h, v19.4h }, [x6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v14.4h, v15.4h, v16.4h, v17.4h }, [x0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v21.4h, v22.4h, v23.4h, v24.4h }, [x25], x22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv8h | ld4r { v4.8h, v5.8h, v6.8h, v7.8h }, [x23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v25.8h, v26.8h, v27.8h, v28.8h }, [x7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v13.8h, v14.8h, v15.8h, v16.8h }, [x19], x27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv2s | ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v23.2s, v24.2s, v25.2s, v26.2s }, [x29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v19.2s, v20.2s, v21.2s, v22.2s }, [x9], x0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv4s | ld4r { v7.4s, v8.4s, v9.4s, v10.4s }, [x23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v9.4s, v10.4s, v11.4s, v12.4s }, [x3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v3.4s, v4.4s, v5.4s, v6.4s }, [x10], x22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv1d | ld4r { v7.1d, v8.1d, v9.1d, v10.1d }, [x26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v11.1d, v12.1d, v13.1d, v14.1d }, [x5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v12.1d, v13.1d, v14.1d, v15.1d }, [x30], x17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitV[3] | LD4Rv2d | ld4r { v7.2d, v8.2d, v9.2d, v10.2d }, [x8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v12.2d, v13.2d, v14.2d, v15.2d }, [x2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v17.2d, v18.2d, v19.2d, v20.2d }, [x21], x13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4W_IMM | ld4w { z18.s - z21.s }, p6/z, [x4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 2 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | LD4W_IMM | ld4w { z21.s - z24.s }, p5/z, [x16, #-8, mul vl] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 2 12 12 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 3 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitV[8], V1UnitV01[8] | LD4W | ld4w { z25.s - z28.s }, p2/z, [x23, x8, lsl #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 3 13 13 0.25 V1UnitL01[8],V1UnitV01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SW_D_SXTW_SCALED | ld1sw { z22.d }, p1/z, [x14, z23.d, sxtw #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SW_D_SXTW | ld1sw { z4.d }, p3/z, [x20, z15.d, sxtw] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1SW_D_SCALED | ld1sw { z1.d }, p4/z, [x20, z23.d, lsl #2] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SW_D | ld1sw { z2.d }, p7/z, [x4, z0.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z12.d }, p7/z, [z21.d] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1SW_D_IMM | ld1sw { z27.d }, p3/z, [z10.d, #24] // LD1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1W_SXTW_SCALED | ld1w { z9.s }, p0/z, [x18, z9.s, sxtw #2] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1W_D_UXTW_SCALED | ld1w { z14.d }, p5/z, [x26, z2.d, uxtw #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1W_D_UXTW | ld1w { z31.d }, p6/z, [x17, z2.d, uxtw] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1W_SXTW | ld1w { z14.s }, p2/z, [x18, z28.s, sxtw] // LD1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1W_D_SCALED | ld1w { z13.d }, p3/z, [x5, z11.d, lsl #2] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1W_D | ld1w { z24.d }, p3/z, [x2, z17.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1W_IMM | ld1w { z4.s }, p0/z, [z1.s] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLD1W_IMM | ld1w { z17.s }, p6/z, [z26.s, #60] // LD1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z31.d }, p7/z, [z22.d] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLD1W_D_IMM | ld1w { z2.d }, p3/z, [z6.d, #116] // LD1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov8b | ld2 { v13.8b, v14.8b }, [x4] // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov8b_POST | ld2 { v20.8b, v21.8b }, [x11], #16 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov8b_POST | ld2 { v13.8b, v14.8b }, [x4], x7 // LD2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov16b | ld2 { v26.16b, v27.16b }, [x16] // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v15.16b, v16.16b }, [x3], #32 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov16b_POST | ld2 { v24.16b, v25.16b }, [x7], x30 // LD2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov4h | ld2 { v0.4h, v1.4h }, [x21] // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x30], #16 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov4h_POST | ld2 { v5.4h, v6.4h }, [x22], x1 // LD2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov8h | ld2 { v8.8h, v9.8h }, [x28] // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v14.8h, v15.8h }, [x19], #32 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov8h_POST | ld2 { v28.8h, v29.8h }, [x26], x7 // LD2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 3.00 | V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov2s | ld2 { v2.2s, v3.2s }, [x16] // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 2 8 8 3.0 V1UnitL,V1UnitV
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov2s_POST | ld2 { v23.2s, v24.2s }, [x5], #16 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 3.00 | V1UnitI, V1UnitL, V1UnitSVE01, V1UnitV | LD2Twov2s_POST | ld2 { v22.2s, v23.2s }, [x11], x12 // LD2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, D-form, B/H/S \\ 3 8 8 3.0 V1UnitL,V1UnitV,V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov4s | ld2 { v22.4s, v23.4s }, [x4] // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v27.4s, v28.4s }, [x18], #32 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov4s_POST | ld2 { v22.4s, v23.4s }, [x26], x29 // LD2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov2d | ld2 { v22.2d, v23.2d }, [x17] // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, multiple, Q-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v12.2d, v13.2d }, [x19], #32 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Twov2d_POST | ld2 { v6.2d, v7.2d }, [x11], x24 // LD2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, multiple, Q-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i8 | ld2 { v29.b, v30.b }[3], [x1] // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i16 | ld2 { v23.h, v24.h }[7], [x14] // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, B/H \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i32 | ld2 { v26.s, v27.s }[1], [x17] // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i64 | ld2 { v1.d, v2.d }[0], [x10] // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 2 element, one lane, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i8_POST | ld2 { v20.b, v21.b }[9], [x24], #2 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i8_POST | ld2 { v29.b, v30.b }[6], [x18], x19 // LD2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i16_POST | ld2 { v2.h, v3.h }[3], [x12], #4 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i16_POST | ld2 { v11.h, v12.h }[3], [x18], x17 // LD2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, B/H \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i32_POST | ld2 { v15.s, v16.s }[1], [x7], #8 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i32_POST | ld2 { v29.s, v30.s }[1], [x12], x0 // LD2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i64_POST | ld2 { v1.d, v2.d }[1], [x3], #16 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2i64_POST | ld2 { v10.d, v11.d }[1], [x18], x27 // LD2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, one lane, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2B_IMM | ld2b { z9.b, z10.b }, p2/z, [x22] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2B_IMM | ld2b { z28.b, z29.b }, p3/z, [x22, #4, mul vl] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2B | ld2b { z26.b, z27.b }, p1/z, [x3, x12] // LD2B { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2D_IMM | ld2d { z12.d, z13.d }, p5/z, [x24] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2D_IMM | ld2d { z22.d, z23.d }, p2/z, [x21, #-2, mul vl] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2D | ld2d { z22.d, z23.d }, p6/z, [x14, x4, lsl #3] // LD2D { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2H_IMM | ld2h { z5.h, z6.h }, p5/z, [x20] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2H_IMM | ld2h { z27.h, z28.h }, p7/z, [x11, #14, mul vl] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 4 | 10 | 10 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitSVE01[2], V1UnitV[2] | LD2H | ld2h { z18.h, z19.h }, p3/z, [x9, x17, lsl #1] // LD2H { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 4 10 10 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2],V1UnitS[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8b | ld2r { v10.8b, v11.8b }, [x20] // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v18.8b, v19.8b }, [x11], #2 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8b_POST | ld2r { v28.8b, v29.8b }, [x30], x14 // LD2R { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv16b | ld2r { v10.16b, v11.16b }, [x23] // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v24.16b, v25.16b }, [x1], #2 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #2 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv16b_POST | ld2r { v20.16b, v21.16b }, [x11], x7 // LD2R { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4h | ld2r { v25.4h, v26.4h }, [x11] // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v28.4h, v29.4h }, [x18], #4 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4h_POST | ld2r { v21.4h, v22.4h }, [x2], x17 // LD2R { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8h | ld2r { v23.8h, v24.8h }, [x10] // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v19.8h, v20.8h }, [x29], #4 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #4 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv8h_POST | ld2r { v13.8h, v14.8h }, [x13], x5 // LD2R { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2s | ld2r { v25.2s, v26.2s }, [x19] // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v5.2s, v6.2s }, [x28], #8 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2s_POST | ld2r { v4.2s, v5.2s }, [x14], x19 // LD2R { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, B/H/S \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4s | ld2r { v8.4s, v9.4s }, [x17] // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v22.4s, v23.4s }, [x5], #8 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #8 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv4s_POST | ld2r { v29.4s, v30.4s }, [x4], x18 // LD2R { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv1d | ld2r { v9.1d, v10.1d }, [x25] // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, D-form, D \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v15.1d, v16.1d }, [x26], #16 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv1d_POST | ld2r { v10.1d, v11.1d }, [x28], x26 // LD2R { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, D-form, D \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.50 | V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2d | ld2r { v26.2d, v27.2d }, [x8] // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD load, 2 element, all lanes, Q-form \\ 2 8 8 1.5 V1UnitL[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v14.2d, v15.2d }, [x3], #16 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #16 \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.50 | V1UnitI, V1UnitL[2], V1UnitSVE01[2], V1UnitV[2] | LD2Rv2d_POST | ld2r { v24.2d, v25.2d }, [x6], x14 // LD2R { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 2 element, all lanes, Q-form \\ 3 8 8 1.5 V1UnitL[2],V1UnitV[2],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2W_IMM | ld2w { z21.s, z22.s }, p4/z, [x12] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2W_IMM | ld2w { z29.s, z30.s }, p2/z, [x19, #6, mul vl] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load two structures to two vectors, scalar + imm \\ 3 8 8 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 9 | 9 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | LD2W | ld2w { z18.s, z19.s }, p6/z, [x22, x22, lsl #2] // LD2W { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load two structures to two vectors, scalar + scalar \\ 3 9 9 1.0 V1UnitSVE01[2],V1UnitSVE01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8b | ld3 { v8.8b, v9.8b, v10.8b }, [x0] // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v6.8b, v7.8b, v8.8b }, [x26], #24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8b_POST | ld3 { v20.8b, v21.8b, v22.8b }, [x25], x24 // LD3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev16b | ld3 { v15.16b, v16.16b, v17.16b }, [x5] // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v19.16b, v20.16b, v21.16b }, [x3], #48 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev16b_POST | ld3 { v26.16b, v27.16b, v28.16b }, [x8], x29 // LD3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4h | ld3 { v15.4h, v16.4h, v17.4h }, [x8] // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v4.4h, v5.4h, v6.4h }, [x5], #24 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4h_POST | ld3 { v24.4h, v25.4h, v26.4h }, [x25], x0 // LD3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8h | ld3 { v7.8h, v8.8h, v9.8h }, [x21] // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v4.8h, v5.8h, v6.8h }, [x26], #48 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev8h_POST | ld3 { v12.8h, v13.8h, v14.8h }, [x0], x25 // LD3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2s | ld3 { v16.2s, v17.2s, v18.2s }, [x0] // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v9.2s, v10.2s, v11.2s }, [x1], #24 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2s_POST | ld3 { v27.2s, v28.2s, v29.2s }, [x23], x4 // LD3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4s | ld3 { v12.4s, v13.4s, v14.4s }, [x25] // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v12.4s, v13.4s, v14.4s }, [x27], #48 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev4s_POST | ld3 { v2.4s, v3.4s, v4.4s }, [x22], x21 // LD3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2d | ld3 { v10.2d, v11.2d, v12.2d }, [x18] // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, multiple, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v25.2d, v26.2d, v27.2d }, [x4], #48 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Threev2d_POST | ld3 { v6.2d, v7.2d, v8.2d }, [x10], x24 // LD3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, multiple, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i8 | ld3 { v17.b, v18.b, v19.b }[2], [x27] // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i16 | ld3 { v18.h, v19.h, v20.h }[5], [x16] // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i32 | ld3 { v1.s, v2.s, v3.s }[3], [x14] // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i64 | ld3 { v5.d, v6.d, v7.d }[1], [x14] // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 3 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i8_POST | ld3 { v16.b, v17.b, v18.b }[3], [x15], #3 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i8_POST | ld3 { v14.b, v15.b, v16.b }[4], [x23], x6 // LD3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i16_POST | ld3 { v11.h, v12.h, v13.h }[1], [x28], #6 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i16_POST | ld3 { v4.h, v5.h, v6.h }[2], [x5], x15 // LD3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i32_POST | ld3 { v26.s, v27.s, v28.s }[0], [x14], #12 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i32_POST | ld3 { v1.s, v2.s, v3.s }[0], [x26], x20 // LD3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i64_POST | ld3 { v14.d, v15.d, v16.d }[1], [x30], #24 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3i64_POST | ld3 { v23.d, v24.d, v25.d }[0], [x24], x14 // LD3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3B_IMM | ld3b { z29.b - z31.b }, p3/z, [x17] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3B_IMM | ld3b { z23.b - z25.b }, p7/z, [x12, #18, mul vl] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 4 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitSVE01[6], V1UnitV[6] | LD3B | ld3b { z23.b - z25.b }, p3/z, [x12, x12] // LD3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3D_IMM | ld3d { z20.d - z22.d }, p2/z, [x6] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3D_IMM | ld3d { z1.d - z3.d }, p2/z, [x9, #-15, mul vl] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 4 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitSVE01[6], V1UnitV[6] | LD3D | ld3d { z13.d - z15.d }, p6/z, [x27, x30, lsl #3] // LD3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3H_IMM | ld3h { z26.h - z28.h }, p1/z, [x29] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3H_IMM | ld3h { z14.h - z16.h }, p3/z, [x18, #9, mul vl] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 4 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitSVE01[6], V1UnitV[6] | LD3H | ld3h { z5.h - z7.h }, p3/z, [x6, x21, lsl #1] // LD3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8b | ld3r { v24.8b, v25.8b, v26.8b }, [x10] // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v14.8b, v15.8b, v16.8b }, [x11], #3 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8b_POST | ld3r { v22.8b, v23.8b, v24.8b }, [x0], x11 // LD3R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv16b | ld3r { v17.16b, v18.16b, v19.16b }, [x3] // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v7.16b, v8.16b, v9.16b }, [x29], #3 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #3 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv16b_POST | ld3r { v3.16b, v4.16b, v5.16b }, [x20], x5 // LD3R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4h | ld3r { v3.4h, v4.4h, v5.4h }, [x1] // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v8.4h, v9.4h, v10.4h }, [x3], #6 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4h_POST | ld3r { v4.4h, v5.4h, v6.4h }, [x0], x28 // LD3R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8h | ld3r { v6.8h, v7.8h, v8.8h }, [x28] // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v4.8h, v5.8h, v6.8h }, [x11], #6 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #6 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv8h_POST | ld3r { v3.8h, v4.8h, v5.8h }, [x17], x0 // LD3R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2s | ld3r { v18.2s, v19.2s, v20.2s }, [x24] // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v8.2s, v9.2s, v10.2s }, [x22], #12 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2s_POST | ld3r { v12.2s, v13.2s, v14.2s }, [x0], x14 // LD3R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4s | ld3r { v28.4s, v29.4s, v30.4s }, [x2] // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v21.4s, v22.4s, v23.4s }, [x22], #12 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #12 \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv4s_POST | ld3r { v28.4s, v29.4s, v30.4s }, [x13], x25 // LD3R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv1d | ld3r { v1.1d, v2.1d, v3.1d }, [x28] // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v0.1d, v1.1d, v2.1d }, [x7], #24 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv1d_POST | ld3r { v22.1d, v23.1d, v24.1d }, [x9], x15 // LD3R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2d | ld3r { v8.2d, v9.2d, v10.2d }, [x3] // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v3.2d, v4.2d, v5.2d }, [x25], #24 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #24 \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD3Rv2d_POST | ld3r { v8.2d, v9.2d, v10.2d }, [x18], x13 // LD3R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 3 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3W_IMM | ld3w { z23.s - z25.s }, p1/z, [x8] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 3 | 11 | 11 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6] | LD3W_IMM | ld3w { z6.s - z8.s }, p4/z, [x0, #18, mul vl] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load three structures to three vectors, scalar + imm \\ 3 11 11 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6]
+# CHECK-NEXT: 4 | 13 | 13 | 0.33 | V1UnitI[6], V1UnitL[6], V1UnitL01[6], V1UnitS[6], V1UnitSVE01[6], V1UnitV[6] | LD3W | ld3w { z27.s - z29.s }, p3/z, [x3, x6, lsl #2] // LD3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load three structures to three vectors, scalar + scalar \\ 4 13 13 0.33 V1UnitSVE01[6],V1UnitSVE01[6],V1UnitL01[6],V1UnitS[6]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv8b | ld4 { v6.8b, v7.8b, v8.8b, v9.8b }, [x27] // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v20.8b, v21.8b, v22.8b, v23.8b }, [x10], #32 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv8b_POST | ld4 { v18.8b, v19.8b, v20.8b, v21.8b }, [x24], x11 // LD4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv16b | ld4 { v11.16b, v12.16b, v13.16b, v14.16b }, [x5] // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v10.16b, v11.16b, v12.16b, v13.16b }, [x12], #64 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv16b_POST | ld4 { v12.16b, v13.16b, v14.16b, v15.16b }, [x4], x17 // LD4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv4h | ld4 { v21.4h, v22.4h, v23.4h, v24.4h }, [x14] // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v10.4h, v11.4h, v12.4h, v13.4h }, [x19], #32 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv4h_POST | ld4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x15], x17 // LD4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv8h | ld4 { v9.8h, v10.8h, v11.8h, v12.8h }, [x1] // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x0], #64 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv8h_POST | ld4 { v4.8h, v5.8h, v6.8h, v7.8h }, [x17], x17 // LD4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv2s | ld4 { v23.2s, v24.2s, v25.2s, v26.2s }, [x24] // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v25.2s, v26.2s, v27.2s, v28.2s }, [x3], #32 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Fourv2s_POST | ld4 { v22.2s, v23.2s, v24.2s, v25.2s }, [x14], x15 // LD4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv4s | ld4 { v17.4s, v18.4s, v19.4s, v20.4s }, [x4] // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v25.4s, v26.4s, v27.4s, v28.4s }, [x19], #64 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv4s_POST | ld4 { v4.4s, v5.4s, v6.4s, v7.4s }, [x28], x3 // LD4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, B/H/S \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv2d | ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x24] // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, multiple, Q-form, D \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x0], #64 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 3 | 9 | 9 | 0.50 | V1UnitI, V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | LD4Fourv2d_POST | ld4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x27], x4 // LD4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, multiple, Q-form, D \\ 3 9 9 0.5 V1UnitL[6],V1UnitV[6],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i8 | ld4 { v4.b, v5.b, v6.b, v7.b }[12], [x27] // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i16 | ld4 { v5.h, v6.h, v7.h, v8.h }[0], [x4] // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, B/H \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i32 | ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x26] // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i64 | ld4 { v2.d, v3.d, v4.d, v5.d }[0], [x29] // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD load, 4 element, one lane, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i8_POST | ld4 { v26.b, v27.b, v28.b, v29.b }[4], [x13], #4 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i8_POST | ld4 { v10.b, v11.b, v12.b, v13.b }[11], [x24], x21 // LD4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i16_POST | ld4 { v8.h, v9.h, v10.h, v11.h }[0], [x17], #8 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i16_POST | ld4 { v21.h, v22.h, v23.h, v24.h }[2], [x21], x24 // LD4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, B/H \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i32_POST | ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x28], #16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i32_POST | ld4 { v20.s, v21.s, v22.s, v23.s }[1], [x27], x16 // LD4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i64_POST | ld4 { v18.d, v19.d, v20.d, v21.d }[1], [x26], #32 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4i64_POST | ld4 { v8.d, v9.d, v10.d, v11.d }[0], [x23], x0 // LD4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, one lane, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4B_IMM | ld4b { z16.b - z19.b }, p3/z, [x23] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4B_IMM | ld4b { z7.b - z10.b }, p5/z, [x3, #12, mul vl] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 4 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitSVE01[8], V1UnitV[8] | LD4B | ld4b { z7.b - z10.b }, p4/z, [x20, x12] // LD4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4D_IMM | ld4d { z26.d - z29.d }, p7/z, [x10] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4D_IMM | ld4d { z27.d - z30.d }, p0/z, [x6, #24, mul vl] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 4 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitSVE01[8], V1UnitV[8] | LD4D | ld4d { z7.d - z10.d }, p4/z, [x25, x8, lsl #3] // LD4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p4/z, [x19] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4H_IMM | ld4h { z4.h - z7.h }, p1/z, [x16, #-8, mul vl] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 4 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitSVE01[8], V1UnitV[8] | LD4H | ld4h { z10.h - z13.h }, p2/z, [x8, x28, lsl #1] // LD4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8b | ld4r { v20.8b, v21.8b, v22.8b, v23.8b }, [x23] // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v24.8b, v25.8b, v26.8b, v27.8b }, [x15], #4 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8b_POST | ld4r { v4.8b, v5.8b, v6.8b, v7.8b }, [x26], x6 // LD4R { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv16b | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x25] // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x14], #4 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #4 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv16b_POST | ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x29], x11 // LD4R { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4h | ld4r { v16.4h, v17.4h, v18.4h, v19.4h }, [x6] // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v14.4h, v15.4h, v16.4h, v17.4h }, [x0], #8 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4h_POST | ld4r { v21.4h, v22.4h, v23.4h, v24.4h }, [x25], x22 // LD4R { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8h | ld4r { v4.8h, v5.8h, v6.8h, v7.8h }, [x23] // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v25.8h, v26.8h, v27.8h, v28.8h }, [x7], #8 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #8 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv8h_POST | ld4r { v13.8h, v14.8h, v15.8h, v16.8h }, [x19], x27 // LD4R { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2s | ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [x30] // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v23.2s, v24.2s, v25.2s, v26.2s }, [x29], #16 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2s_POST | ld4r { v19.2s, v20.2s, v21.2s, v22.2s }, [x9], x0 // LD4R { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4s | ld4r { v7.4s, v8.4s, v9.4s, v10.4s }, [x23] // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v9.4s, v10.4s, v11.4s, v12.4s }, [x3], #16 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #16 \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv4s_POST | ld4r { v3.4s, v4.4s, v5.4s, v6.4s }, [x10], x22 // LD4R { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, B/H/S \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv1d | ld4r { v7.1d, v8.1d, v9.1d, v10.1d }, [x26] // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, D-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v11.1d, v12.1d, v13.1d, v14.1d }, [x5], #32 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv1d_POST | ld4r { v12.1d, v13.1d, v14.1d, v15.1d }, [x30], x17 // LD4R { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, D-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 2 | 8 | 8 | 1.00 | V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2d | ld4r { v7.2d, v8.2d, v9.2d, v10.2d }, [x8] // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 2 8 8 1.0 V1UnitL[3],V1UnitV[3]
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v12.2d, v13.2d, v14.2d, v15.2d }, [x2], #32 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #32 \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 8 | 8 | 1.00 | V1UnitI, V1UnitL[3], V1UnitSVE01[3], V1UnitV[3] | LD4Rv2d_POST | ld4r { v17.2d, v18.2d, v19.2d, v20.2d }, [x21], x13 // LD4R { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD load, 4 element, all lanes, Q-form, D \\ 3 8 8 1.0 V1UnitL[3],V1UnitV[3],V1UnitI
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4W_IMM | ld4w { z18.s - z21.s }, p6/z, [x4] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 12 | 12 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | LD4W_IMM | ld4w { z21.s - z24.s }, p5/z, [x16, #-8, mul vl] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous Load four structures to four vectors, scalar + imm \\ 3 12 12 0.25 V1UnitSVE01[8],V1UnitSVE01[8],V1UnitL01[8]
+# CHECK-NEXT: 4 | 13 | 13 | 0.25 | V1UnitI[8], V1UnitL[8], V1UnitL01[8], V1UnitS[8], V1UnitSVE01[8], V1UnitV[8] | LD4W | ld4w { z25.s - z28.s }, p2/z, [x23, x8, lsl #2] // LD4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous Load four structures to four vectors, scalar + scalar \\ 4 13 13 0.25 V1UnitL01[8],V1UnitSVE01[8],V1UnitSVE01[8],V1UnitS[8]
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w7, [x24] // LDAPUR <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURi | ldapur w25, [x29, #68] // LDAPUR <Wt>, [<Xn|SP>, #<simm>] \\ No description \\ No scheduling info
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | LDAPURXi | ldapur x20, [x13] // LDAPUR <Xt>, [<Xn|SP>] \\ No description \\ No scheduling info
@@ -5510,86 +5513,86 @@ test:
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_S | ldff1b { z17.s }, p7/z, [x11, x15] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z9.d }, p2/z, [x3] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1B_D | ldff1b { z5.d }, p2/z, [x6, x8] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_SXTW | ldff1b { z7.d }, p3/z, [x27, z19.d, sxtw] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_S_SXTW | ldff1b { z13.s }, p3/z, [x24, z25.s, sxtw] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D | ldff1b { z27.d }, p0/z, [x13, z16.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z7.s }, p7/z, [z16.s] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z11.s }, p5/z, [z8.s, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z2.d }, p7/z, [z19.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z3.d }, p5/z, [z0.d, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1B_D_SXTW | ldff1b { z7.d }, p3/z, [x27, z19.d, sxtw] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1B_S_SXTW | ldff1b { z13.s }, p3/z, [x24, z25.s, sxtw] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1B_D | ldff1b { z27.d }, p0/z, [x13, z16.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z7.s }, p7/z, [z16.s] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1B_S_IMM | ldff1b { z11.s }, p5/z, [z8.s, #25] // LDFF1B { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z2.d }, p7/z, [z19.d] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1B_D_IMM | ldff1b { z3.d }, p5/z, [z0.d, #11] // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z21.d }, p2/z, [x20] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1D | ldff1d { z9.d }, p3/z, [x28, x30, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1D_SXTW_SCALED | ldff1d { z21.d }, p4/z, [x11, z12.d, sxtw #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_UXTW | ldff1d { z6.d }, p4/z, [x15, z1.d, uxtw] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1D_SCALED | ldff1d { z12.d }, p7/z, [x11, z28.d, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D | ldff1d { z26.d }, p4/z, [x30, z5.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z10.d }, p5/z, [z10.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z21.d }, p6/z, [z3.d, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1D_SXTW_SCALED | ldff1d { z21.d }, p4/z, [x11, z12.d, sxtw #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1D_UXTW | ldff1d { z6.d }, p4/z, [x15, z1.d, uxtw] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1D_SCALED | ldff1d { z12.d }, p7/z, [x11, z28.d, lsl #3] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1D | ldff1d { z26.d }, p4/z, [x30, z5.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z10.d }, p5/z, [z10.d] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1D_IMM | ldff1d { z21.d }, p6/z, [z3.d, #48] // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z14.h }, p3/z, [x22] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H | ldff1h { z15.h }, p2/z, [x24, x8, lsl #1] // LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z23.s }, p0/z, [x12] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_S | ldff1h { z18.s }, p0/z, [x7, x25, lsl #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z16.d }, p0/z, [x11] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1H_D | ldff1h { z25.d }, p3/z, [x24, x19, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_SXTW_SCALED | ldff1h { z9.s }, p2/z, [x3, z24.s, sxtw #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_D_UXTW_SCALED | ldff1h { z7.d }, p0/z, [x8, z17.d, uxtw #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_SXTW | ldff1h { z9.d }, p5/z, [x4, z10.d, sxtw] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_S_UXTW | ldff1h { z4.s }, p4/z, [x6, z27.s, uxtw] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_D_SCALED | ldff1h { z25.d }, p1/z, [x29, z6.d, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D | ldff1h { z10.d }, p7/z, [x1, z26.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z4.s }, p1/z, [z27.s] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z5.s }, p3/z, [z8.s, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z16.d }, p5/z, [z10.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z15.d }, p2/z, [z19.d, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1H_S_SXTW_SCALED | ldff1h { z9.s }, p2/z, [x3, z24.s, sxtw #1] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1H_D_UXTW_SCALED | ldff1h { z7.d }, p0/z, [x8, z17.d, uxtw #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1H_D_SXTW | ldff1h { z9.d }, p5/z, [x4, z10.d, sxtw] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1H_S_UXTW | ldff1h { z4.s }, p4/z, [x6, z27.s, uxtw] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1H_D_SCALED | ldff1h { z25.d }, p1/z, [x29, z6.d, lsl #1] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1H_D | ldff1h { z10.d }, p7/z, [x1, z26.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z4.s }, p1/z, [z27.s] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1H_S_IMM | ldff1h { z5.s }, p3/z, [z8.s, #62] // LDFF1H { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z16.d }, p5/z, [z10.d] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1H_D_IMM | ldff1h { z15.d }, p2/z, [z19.d, #34] // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z0.h }, p2/z, [x2] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_H | ldff1sb { z29.h }, p1/z, [x16, x21] // LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z20.s }, p7/z, [x8] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_S | ldff1sb { z8.s }, p2/z, [x4, x14] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z11.d }, p4/z, [x6] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SB_D | ldff1sb { z17.d }, p4/z, [x16, x10] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_SXTW | ldff1sb { z13.d }, p2/z, [x28, z8.d, sxtw] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_S_SXTW | ldff1sb { z3.s }, p2/z, [x26, z24.s, sxtw] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D | ldff1sb { z10.d }, p7/z, [x20, z6.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z18.s }, p3/z, [z9.s] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z25.s }, p2/z, [z29.s, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z8.d }, p0/z, [z24.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z7.d }, p0/z, [z4.d, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SB_D_SXTW | ldff1sb { z13.d }, p2/z, [x28, z8.d, sxtw] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SB_S_SXTW | ldff1sb { z3.s }, p2/z, [x26, z24.s, sxtw] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SB_D | ldff1sb { z10.d }, p7/z, [x20, z6.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z18.s }, p3/z, [z9.s] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SB_S_IMM | ldff1sb { z25.s }, p2/z, [z29.s, #25] // LDFF1SB { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z8.d }, p0/z, [z24.d] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SB_D_IMM | ldff1sb { z7.d }, p0/z, [z4.d, #9] // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z2.s }, p2/z, [x6] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_S | ldff1sh { z9.s }, p3/z, [x30, x16, lsl #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z7.d }, p4/z, [x30] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 7 | 7 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SH_D | ldff1sh { z1.d }, p0/z, [x29, x0, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous first faulting load, scalar + scalar \\ 2 7 7 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_SXTW_SCALED | ldff1sh { z25.s }, p4/z, [x5, z9.s, sxtw #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_D_SXTW_SCALED | ldff1sh { z17.d }, p3/z, [x0, z25.d, sxtw #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_SXTW | ldff1sh { z12.d }, p7/z, [x5, z15.d, sxtw] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_S_UXTW | ldff1sh { z8.s }, p5/z, [x3, z21.s, uxtw] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_D_SCALED | ldff1sh { z14.d }, p6/z, [x17, z27.d, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D | ldff1sh { z23.d }, p4/z, [x22, z0.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z6.s }, p4/z, [z6.s] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z3.s }, p7/z, [z26.s, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z25.d }, p3/z, [z17.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z2.d }, p3/z, [z31.d, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SH_S_SXTW_SCALED | ldff1sh { z25.s }, p4/z, [x5, z9.s, sxtw #1] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SH_D_SXTW_SCALED | ldff1sh { z17.d }, p3/z, [x0, z25.d, sxtw #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SH_D_SXTW | ldff1sh { z12.d }, p7/z, [x5, z15.d, sxtw] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SH_S_UXTW | ldff1sh { z8.s }, p5/z, [x3, z21.s, uxtw] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SH_D_SCALED | ldff1sh { z14.d }, p6/z, [x17, z27.d, lsl #1] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SH_D | ldff1sh { z23.d }, p4/z, [x22, z0.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z6.s }, p4/z, [z6.s] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SH_S_IMM | ldff1sh { z3.s }, p7/z, [z26.s, #16] // LDFF1SH { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z25.d }, p3/z, [z17.d] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SH_D_IMM | ldff1sh { z2.d }, p3/z, [z31.d, #26] // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z16.d }, p2/z, [x8] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1SW_D | ldff1sw { z27.d }, p1/z, [x6, x11, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SW_D_UXTW_SCALED | ldff1sw { z27.d }, p3/z, [x5, z20.d, uxtw #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_SXTW | ldff1sw { z15.d }, p1/z, [x13, z26.d, sxtw] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1SW_D_SCALED | ldff1sw { z24.d }, p2/z, [x7, z23.d, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D | ldff1sw { z8.d }, p3/z, [x5, z22.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z16.d }, p6/z, [z12.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z3.d }, p1/z, [z13.d, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SW_D_UXTW_SCALED | ldff1sw { z27.d }, p3/z, [x5, z20.d, uxtw #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SW_D_SXTW | ldff1sw { z15.d }, p1/z, [x13, z26.d, sxtw] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1SW_D_SCALED | ldff1sw { z24.d }, p2/z, [x7, z23.d, lsl #2] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SW_D | ldff1sw { z8.d }, p3/z, [x5, z22.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z16.d }, p6/z, [z12.d] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1SW_D_IMM | ldff1sw { z3.d }, p1/z, [z13.d, #60] // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z2.s }, p5/z, [x13] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W | ldff1w { z9.s }, p3/z, [x16, x19, lsl #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z31.d }, p6/z, [x3] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
# CHECK-NEXT: 2 | 6 | 6 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS | LDFF1W_D | ldff1w { z30.d }, p4/z, [x25, x12, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous first faulting load, scalar + scalar \\ 2 6 6 2.0 V1UnitL01,V1UnitS
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_UXTW_SCALED | ldff1w { z27.s }, p6/z, [x10, z17.s, uxtw #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_D_SXTW_SCALED | ldff1w { z8.d }, p4/z, [x28, z31.d, sxtw #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_UXTW | ldff1w { z1.d }, p0/z, [x23, z14.d, uxtw] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_UXTW | ldff1w { z17.s }, p5/z, [x8, z6.s, uxtw] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_D_SCALED | ldff1w { z19.d }, p3/z, [x7, z18.d, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D | ldff1w { z23.d }, p2/z, [x16, z4.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z24.s }, p6/z, [z24.s] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z20.s }, p0/z, [z6.s, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z21.d }, p5/z, [z12.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
-# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z29.d }, p2/z, [z11.d, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1W_UXTW_SCALED | ldff1w { z27.s }, p6/z, [x10, z17.s, uxtw #2] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1W_D_SXTW_SCALED | ldff1w { z8.d }, p4/z, [x28, z31.d, sxtw #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1W_D_UXTW | ldff1w { z1.d }, p0/z, [x23, z14.d, uxtw] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1W_UXTW | ldff1w { z17.s }, p5/z, [x8, z6.s, uxtw] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1W_D_SCALED | ldff1w { z19.d }, p3/z, [x7, z18.d, lsl #2] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] \\ Gather load, 32-bit scaled offset \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1W_D | ldff1w { z23.d }, p2/z, [x16, z4.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] \\ Gather load, 32-bit unpacked unscaled offset \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z24.s }, p6/z, [z24.s] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 11 | 11 | 0.25 | V1UnitL[12], V1UnitSVE01[12], V1UnitV[12] | GLDFF1W_IMM | ldff1w { z20.s }, p0/z, [z6.s, #36] // LDFF1W { <Zt>.S }, <Pg>/Z, [<Zn>.S, #<imm>] \\ Gather load, vector + imm, 32-bit element size \\ 2 11 11 0.25 V1UnitL[12],V1UnitV[12]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z21.d }, p5/z, [z12.d] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
+# CHECK-NEXT: 2 | 9 | 9 | 0.50 | V1UnitL[6], V1UnitSVE01[6], V1UnitV[6] | GLDFF1W_D_IMM | ldff1w { z29.d }, p2/z, [z11.d, #40] // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D, #<imm>] \\ Gather load, vector + imm, 64-bit element size \\ 2 9 9 0.5 V1UnitL[6],V1UnitV[6]
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z17.b }, p5/z, [x20] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_IMM | ldnf1b { z8.b }, p5/z, [x26, #1, mul vl] // LDNF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
# CHECK-NEXT: 1 | 6 | 6 | 2.00 | V1UnitL, V1UnitL01 | LDNF1B_H_IMM | ldnf1b { z4.h }, p3/z, [x25] // LDNF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>] \\ Contiguous non faulting load, scalar + imm \\ 1 6 6 2.0 V1UnitL01
@@ -5880,123 +5883,123 @@ test:
# CHECK-NEXT: 1 | 4 | 4 | 3.00 | V1UnitL | LDXRH | ldxrh w24, [x11] // LDXRH <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w25, w0, #22 // LSL <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsl x27, x7, #56 // LSL <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_B | lsl z1.b, p1/m, z1.b, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_H | lsl z7.h, p3/m, z7.h, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_S | lsl z10.s, p3/m, z10.s, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmI_D | lsl z21.d, p7/m, z21.d, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_B | lsl z13.b, z4.b, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_H | lsl z11.h, z16.h, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_S | lsl z16.s, z11.s, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZZI_D | lsl z18.d, z4.d, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZPmI_B | lsl z1.b, p1/m, z1.b, #3 // LSL <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZPmI_H | lsl z7.h, p3/m, z7.h, #5 // LSL <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZPmI_S | lsl z10.s, p3/m, z10.s, #7 // LSL <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZPmI_D | lsl z21.d, p7/m, z21.d, #28 // LSL <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZZI_B | lsl z13.b, z4.b, #2 // LSL <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZZI_H | lsl z11.h, z16.h, #1 // LSL <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZZI_S | lsl z16.s, z11.s, #6 // LSL <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZZI_D | lsl z18.d, z4.d, #26 // LSL <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w4, w9, w12 // LSL <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x29, x22 // LSL <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_ZPmZ_D | lsl z3.d, p2/m, z3.d, z15.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZPmZ_S | lsl z3.s, p6/m, z3.s, z8.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSL_WIDE_ZZZ_S | lsl z19.s, z25.s, z25.d // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSLR_ZPmZ_H | lslr z3.h, p5/m, z3.h, z23.h // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_ZPmZ_D | lsl z3.d, p2/m, z3.d, z15.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_WIDE_ZPmZ_S | lsl z3.s, p6/m, z3.s, z8.d // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSL_WIDE_ZZZ_S | lsl z19.s, z25.s, z25.d // LSL <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSLR_ZPmZ_H | lslr z3.h, p5/m, z3.h, z23.h // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVWr | lsl w6, w8, w2 // LSLV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSLVXr | lsl x7, x26, x21 // LSLV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsr w0, w0, #30 // LSR <Wd>, <Wn>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | lsr x23, x24, #23 // LSR <Xd>, <Xn>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_B | lsr z21.b, p5/m, z21.b, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_H | lsr z1.h, p4/m, z1.h, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_S | lsr z24.s, p7/m, z24.s, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmI_D | lsr z13.d, p3/m, z13.d, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_B | lsr z3.b, z11.b, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_H | lsr z5.h, z12.h, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_S | lsr z21.s, z16.s, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZZI_D | lsr z21.d, z15.d, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZPmI_B | lsr z21.b, p5/m, z21.b, #3 // LSR <Zdn>.B, <Pg>/M, <Zdn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZPmI_H | lsr z1.h, p4/m, z1.h, #5 // LSR <Zdn>.H, <Pg>/M, <Zdn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZPmI_S | lsr z24.s, p7/m, z24.s, #9 // LSR <Zdn>.S, <Pg>/M, <Zdn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZPmI_D | lsr z13.d, p3/m, z13.d, #4 // LSR <Zdn>.D, <Pg>/M, <Zdn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZZI_B | lsr z3.b, z11.b, #3 // LSR <Zd>.B, <Zn>.B, #<constb> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZZI_H | lsr z5.h, z12.h, #2 // LSR <Zd>.H, <Zn>.H, #<consth> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZZI_S | lsr z21.s, z16.s, #15 // LSR <Zd>.S, <Zn>.S, #<consts> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZZI_D | lsr z21.d, z15.d, #8 // LSR <Zd>.D, <Zn>.D, #<constd> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w17, w20, w15 // LSR <Wd>, <Wn>, <Wm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x24, x4, x20 // LSR <Xd>, <Xn>, <Xm> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_ZPmZ_D | lsr z30.d, p3/m, z30.d, z28.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZPmZ_H | lsr z18.h, p3/m, z18.h, z29.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSR_WIDE_ZZZ_H | lsr z7.h, z30.h, z11.d // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | LSRR_ZPmZ_B | lsrr z14.b, p1/m, z14.b, z16.b // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_ZPmZ_D | lsr z30.d, p3/m, z30.d, z28.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_WIDE_ZPmZ_H | lsr z18.h, p3/m, z18.h, z29.d // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSR_WIDE_ZZZ_H | lsr z7.h, z30.h, z11.d // LSR <Zd>.<T>, <Zn>.<T>, <Zm>.D \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | LSRR_ZPmZ_B | lsrr z14.b, p1/m, z14.b, z16.b // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, shift \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVWr | lsr w0, w28, w19 // LSRV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | LSRVXr | lsr x16, x22, x19 // LSRV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_B | mad z17.b, p7/m, z4.b, z5.b // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_H | mad z29.h, p4/m, z31.h, z18.h // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MAD_ZPmZZ_S | mad z7.s, p4/m, z5.s, z29.s // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MAD_ZPmZZ_D | mad z28.d, p7/m, z18.d, z2.d // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MAD_ZPmZZ_B | mad z17.b, p7/m, z4.b, z5.b // MAD <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MAD_ZPmZZ_H | mad z29.h, p4/m, z31.h, z18.h // MAD <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MAD_ZPmZZ_S | mad z7.s, p4/m, z5.s, z29.s // MAD <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MAD_ZPmZZ_D | mad z28.d, p7/m, z18.d, z2.d // MAD <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDWrrr | madd w15, w9, w9, w29 // MADD <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MADDXrrr | madd x29, x22, x21, x21 // MADD <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv8i16_indexed | mla v15.8h, v22.8h, v4.h[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv2i32_indexed | mla v28.2s, v10.2s, v2.s[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLAv4i32 | mla v31.4s, v18.4s, v27.4s // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z1.b, p0/m, z3.b, z3.b // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_H | mla z21.h, p2/m, z31.h, z30.h // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_S | mla z24.s, p3/m, z11.s, z9.s // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLA_ZPmZZ_D | mla z2.d, p0/m, z12.d, z5.d // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv8i16_indexed | mls v25.8h, v29.8h, v0.h[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv2i32_indexed | mls v22.2s, v29.2s, v0.s[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | MLSv4i32 | mls v26.4s, v5.4s, v28.4s // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_B | mls z11.b, p1/m, z28.b, z6.b // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_H | mls z31.h, p0/m, z25.h, z24.h // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLS_ZPmZZ_S | mls z1.s, p5/m, z7.s, z13.s // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MLS_ZPmZZ_D | mls z2.d, p1/m, z17.d, z10.d // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLAv8i16_indexed | mla v15.8h, v22.8h, v4.h[3] // MLA <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLAv2i32_indexed | mla v28.2s, v10.2s, v2.s[0] // MLA <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLAv4i32 | mla v31.4s, v18.4s, v27.4s // MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLA_ZPmZZ_B | mla z1.b, p0/m, z3.b, z3.b // MLA <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLA_ZPmZZ_H | mla z21.h, p2/m, z31.h, z30.h // MLA <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLA_ZPmZZ_S | mla z24.s, p3/m, z11.s, z9.s // MLA <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MLA_ZPmZZ_D | mla z2.d, p0/m, z12.d, z5.d // MLA <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLSv8i16_indexed | mls v25.8h, v29.8h, v0.h[4] // MLS <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLSv2i32_indexed | mls v22.2s, v29.2s, v0.s[3] // MLS <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MLSv4i32 | mls v26.4s, v5.4s, v28.4s // MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLS_ZPmZZ_B | mls z11.b, p1/m, z28.b, z6.b // MLS <Zda>.B, <Pg>/M, <Zn>.B, <Zm>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLS_ZPmZZ_H | mls z31.h, p0/m, z25.h, z24.h // MLS <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLS_ZPmZZ_S | mls z1.s, p5/m, z7.s, z13.s // MLS <Zda>.S, <Pg>/M, <Zn>.S, <Zm>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MLS_ZPmZZ_D | mls z2.d, p1/m, z17.d, z10.d // MLS <Zda>.D, <Pg>/M, <Zn>.D, <Zm>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBWrrr | mneg w14, w30, w30 // MNEG <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MSUBXrrr | mneg x21, x3, x9 // MNEG <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmV_S | mov z9.s, p2/m, s10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_B | mov z17.b, z29.b[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_H | mov z26.h, z7.h[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z14.s, z21.s[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_D | mov z22.d, z14.d[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZZI_S | mov z21.s, s25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmV_S | mov z9.s, p2/m, s10 // MOV <Zd>.<T>, <Pg>/M, <V><n> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_B | mov z17.b, z29.b[38] // MOV <Zd>.B, <Zn>.B[<immb>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_H | mov z26.h, z7.h[16] // MOV <Zd>.H, <Zn>.H[<immh>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_S | mov z14.s, z21.s[13] // MOV <Zd>.S, <Zn>.S[<imms>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_D | mov z22.d, z14.d[2] // MOV <Zd>.D, <Zn>.D[<immd>] \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZZI_S | mov z21.s, s25 // MOV <Zd>.<T>, <V><n> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, wzr, #0xe00 // MOV <Wd|WSP>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x3, #7680 // MOV <Xd|SP>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi8lane | mov v30.b[12], v17.b[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi16lane | mov v10.h[3], v17.h[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi32lane | mov v19.s[2], v2.s[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | INSvi64lane | mov v21.d[1], v16.d[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi8gpr | mov v5.b[12], w23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi16gpr | mov v27.h[6], w6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi32gpr | mov v21.s[0], w21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV | INSvi64gpr | mov v13.d[0], x10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_B | mov z30.b, p7/m, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_D | mov z30.d, p7/m, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPmI_H | mov z10.h, p5/m, #72 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_B | mov z19.b, p6/z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z6.d, p1/z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | CPY_ZPzI_D | mov z12.d, p7/z, #10240 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z30.b, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z2.h, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z20.h, #20992 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi8lane | mov v30.b[12], v17.b[14] // MOV <Vd>.B[<index1b>], <Vn>.B[<index2b>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi16lane | mov v10.h[3], v17.h[5] // MOV <Vd>.H[<index1h>], <Vn>.H[<index2h>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi32lane | mov v19.s[2], v2.s[1] // MOV <Vd>.S[<index1s>], <Vn>.S[<index2s>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | INSvi64lane | mov v21.d[1], v16.d[0] // MOV <Vd>.D[<index1d>], <Vn>.D[<index2d>] \\ ASIMD insert, element to element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi8gpr | mov v5.b[12], w23 // MOV <Vd>.B[<indexb>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi16gpr | mov v27.h[6], w6 // MOV <Vd>.H[<indexh>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi32gpr | mov v21.s[0], w21 // MOV <Vd>.S[<indexs>], W<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01, V1UnitV | INSvi64gpr | mov v13.d[0], x10 // MOV <Vd>.D[<indexd>], X<n> \\ ASIMD transfer, gen reg to element \\ 2 5 5 1.0 V1UnitM0,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_B | mov z30.b, p7/m, #77 // MOV <Zd>.B, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_D | mov z30.d, p7/m, #-89 // MOV <Zd>.<T>, <Pg>/M, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmI_H | mov z10.h, p5/m, #72 // MOV <Zd>.<T>, <Pg>/M, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_B | mov z19.b, p6/z, #0 // MOV <Zd>.B, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_D | mov z6.d, p1/z, #-109 // MOV <Zd>.<T>, <Pg>/Z, #<imm> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | CPY_ZPzI_D | mov z12.d, p7/z, #10240 // MOV <Zd>.<T>, <Pg>/Z, #<imm>, <shift> \\ Copy, scalar SIMD&FP or imm \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_B | mov z30.b, #-31 // MOV <Zd>.B, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_H | mov z2.h, #-56 // MOV <Zd>.<T>, #<imm> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_H | mov z20.h, #20992 // MOV <Zd>.<T>, #<imm>, <shift> \\ Duplicate, immediate and indexed form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w24, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x15, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | mov p0.b, p0/m, p6.b // MOV <Pd>.B, <Pg>/M, <Pn>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | AND_PPzPP | mov p3.b, p7/z, p2.b // MOV <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | mov w21, w11 // MOV <Wd>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x14, x0 // MOV <Xd>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi8 | mov b15, v21.b[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi16 | mov h13, v17.h[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi32 | mov s7, v11.s[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | DUPi64 | mov d27, v24.d[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z12.d, p5/m, x24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
-# CHECK-NEXT: 2 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitV, V1UnitV01 | CPY_ZPmR_D | mov z31.d, p6/m, sp // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 2 5 5 1.0 V1UnitM0,V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi8 | mov b15, v21.b[8] // MOV B<d>, <Vn>.B[<indexb>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi16 | mov h13, v17.h[3] // MOV H<d>, <Vn>.H[<indexh>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi32 | mov s7, v11.s[0] // MOV S<d>, <Vn>.S[<indexs>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | DUPi64 | mov d27, v24.d[1] // MOV D<d>, <Vn>.D[<indexd>] \\ ASIMD duplicate, element \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 3 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmR_D | mov z12.d, p5/m, x24 // MOV <Zd>.<T>, <Pg>/M, <R><n> \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 3 | 5 | 5 | 1.00 | V1UnitI, V1UnitM, V1UnitM0, V1UnitSVE01[2], V1UnitV[2] | CPY_ZPmR_D | mov z31.d, p6/m, sp // MOV <Zd>.<T>, <Pg>/M, <R2>SP \\ Copy, scalar \\ 3 5 5 1.0 V1UnitM0,V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_B | mov z19.b, w27 // MOV <Zd>.<T>, <R><n> \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | DUP_ZR_H | mov z17.h, wsp // MOV <Zd>.<T>, <R2>SP \\ Duplicate, scalar form \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32 | mov w13, v12.s[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64_idx0 | mov x30, v18.d[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi32 | mov w13, v12.s[2] // MOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi64_idx0 | mov x30, v18.d[0] // MOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ADDWri | mov wsp, wsp // MOV <Wd|WSP>, <Wn|WSP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | mov x1, x11 // MOV <Xd|SP>, <Xn|SP> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | mov v12.16b, v6.16b // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_D | mov z1.d, p3/m, z6.d // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | mov z24.d, z25.d // MOV <Zd>.D, <Zn>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv16i8 | mov v12.16b, v6.16b // MOV <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SEL_ZPZZ_D | mov z1.d, p3/m, z6.d // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> \\ Select, vector form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZZZ | mov z24.d, z25.d // MOV <Zd>.D, <Zn>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w30, #3584 // MOV <Wd>, #<imms> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x4, #3584 // MOV <Xd>, #<immd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_B | mov z14.b, #112 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_H | mov z8.h, #96 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_S | mov z2.s, #2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | DUP_ZI_D | mov z6.d, #4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_B | mov z14.b, #112 // MOV <Zd>.B, #<constb> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_H | mov z8.h, #96 // MOV <Zd>.H, #<consth> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_S | mov z2.s, #2 // MOV <Zd>.S, #<consts> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | DUP_ZI_D | mov z6.d, #4 // MOV <Zd>.D, #<constd> \\ Broadcast logical bitmask immediate to vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | mov p2.b, p5.b // MOV <Pd>.B, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv16b_ns | movi v7.16b, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv8i16 | movi v14.8h, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4i16 | movi v13.4h, #74, lsl #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v19.2s, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2i32 | movi v0.2s, #137, lsl #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv4s_msl | movi v1.4s, #122, msl #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVID | movi d16, #0000000000000000 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MOVIv2d_ns | movi v13.2d, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv16b_ns | movi v7.16b, #177 // MOVI <Vd>.<Tb>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv8i16 | movi v14.8h, #174 // MOVI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv4i16 | movi v13.4h, #74, lsl #8 // MOVI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv2i32 | movi v19.2s, #226 // MOVI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv2i32 | movi v0.2s, #137, lsl #24 // MOVI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv4s_msl | movi v1.4s, #122, msl #8 // MOVI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVID | movi d16, #0000000000000000 // MOVI <Dd>, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MOVIv2d_ns | movi v13.2d, #0xff00ff00ff00ff00 // MOVI <Vd>.2D, #<imm> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w8, #57951 // MOVK <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKWi | movk w6, #34540 // MOVK <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVKXi | movk x1, #56641 // MOVK <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
@@ -6005,10 +6008,10 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNWi | mov w27, #-47743 // MOVN <Wd>, #<imm>, LSL #<shifts> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x10, #-63432 // MOVN <Xd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVNXi | mov x0, #2116973299840843775 // MOVN <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZPmZ_B | movprfx z22.b, p0/m, z4.b // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MLA_ZPmZZ_B | mla z22.b, p0/m, z19.b, z25.b // Ignore
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | MOVPRFX_ZZ | movprfx z3, z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 2 | 2.00 | V1UnitV, V1UnitV01 | FMLA_ZPmZZ_D | fmla z3.d, p0/m, z8.d, z19.d // Ignore
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | MOVPRFX_ZPmZ_B | movprfx z22.b, p0/m, z4.b // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> \\ Move prefix \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MLA_ZPmZZ_B | mla z22.b, p0/m, z19.b, z25.b // Ignore
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | MOVPRFX_ZZ | movprfx z3, z26 // MOVPRFX <Zd>, <Zn> \\ Move prefix \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | FMLA_ZPmZZ_D | fmla z3.d, p0/m, z8.d, z19.d // Ignore
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ANDS_PPzPP | movs p0.b, p7/z, p3.b // MOVS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | movs p4.b, p0.b // MOVS <Pd>.B, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZWi | mov w3, #9629 // MOVZ <Wd>, #<imm> \\ Move immed \\ 1 1 1 4.0 V1UnitI
@@ -6017,41 +6020,41 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | MOVZXi | mov x11, #5760103923406864384 // MOVZ <Xd>, #<imm>, LSL #<shiftd> \\ Move immed \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x4, ACTLR_EL1 // MRS <Xt>, <systemreg> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MRS | mrs x14, S2_4_C0_C5_4 // MRS <Xt>, S<op0>_<op1>_<Cn>_<Cm>_<op2> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_B | msb z18.b, p1/m, z27.b, z0.b // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_H | msb z27.h, p5/m, z23.h, z1.h // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 2 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MSB_ZPmZZ_S | msb z26.s, p2/m, z0.s, z2.s // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 1 4 2 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 2 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MSB_ZPmZZ_D | msb z1.d, p6/m, z12.d, z12.d // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 1 5 2 0.5 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MSB_ZPmZZ_B | msb z18.b, p1/m, z27.b, z0.b // MSB <Zdn>.B, <Pg>/M, <Zm>.B, <Za>.B \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MSB_ZPmZZ_H | msb z27.h, p5/m, z23.h, z1.h // MSB <Zdn>.H, <Pg>/M, <Zm>.H, <Za>.H \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 2 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MSB_ZPmZZ_S | msb z26.s, p2/m, z0.s, z2.s // MSB <Zdn>.S, <Pg>/M, <Zm>.S, <Za>.S \\ Multiply accumulate, B, H, S element size \\ 2 4 2 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 2 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MSB_ZPmZZ_D | msb z1.d, p6/m, z12.d, z12.d // MSB <Zdn>.D, <Pg>/M, <Zm>.D, <Za>.D \\ Multiply accumulate, D element size \\ 2 5 2 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr DAIFSet, #0 // MSR <pstatefield1>, #<imm1> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSRpstateImm4 | msr SPSel, #0 // MSR <pstatefield2>, #<imm2> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr ACTLR_EL3, x18 // MSR <systemreg>, <Xt> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | MSR | msr S3_6_C8_C12_1, x23 // MSR S<op0>_<op1>_<Cn>_<Cm>_<op2>, <Xt> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBWrrr | msub w6, w26, w13, w13 // MSUB <Wd>, <Wn>, <Wm>, <Wa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | MSUBXrrr | msub x14, x28, x9, x3 // MSUB <Xd>, <Xn>, <Xm>, <Xa> \\ Multiply accumulate, X-form \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i16_indexed | mul v26.4h, v20.4h, v14.h[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16_indexed | mul v5.8h, v21.8h, v3.h[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv2i32_indexed | mul v29.2s, v10.2s, v3.s[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv4i32_indexed | mul v30.4s, v11.4s, v4.s[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_B | mul z16.b, z16.b, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_H | mul z9.h, z9.h, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZI_S | mul z23.s, z23.s, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZI_D | mul z15.d, z15.d, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | MULv8i16 | mul v3.8h, v9.8h, v8.8h // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_B | mul z17.b, p6/m, z17.b, z9.b // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_H | mul z18.h, p7/m, z18.h, z15.h // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | MUL_ZPmZ_S | mul z29.s, p6/m, z29.s, z8.s // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | MUL_ZPmZ_D | mul z25.d, p1/m, z25.d, z25.d // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MULv4i16_indexed | mul v26.4h, v20.4h, v14.h[5] // MUL <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MULv8i16_indexed | mul v5.8h, v21.8h, v3.h[7] // MUL <Vd>.8H, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MULv2i32_indexed | mul v29.2s, v10.2s, v3.s[1] // MUL <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MULv4i32_indexed | mul v30.4s, v11.4s, v4.s[0] // MUL <Vd>.4S, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZI_B | mul z16.b, z16.b, #-118 // MUL <Zdn>.B, <Zdn>.B, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZI_H | mul z9.h, z9.h, #-56 // MUL <Zdn>.H, <Zdn>.H, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZI_S | mul z23.s, z23.s, #74 // MUL <Zdn>.S, <Zdn>.S, #<imm> \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MUL_ZI_D | mul z15.d, z15.d, #20 // MUL <Zdn>.D, <Zdn>.D, #<imm> \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | MULv8i16 | mul v3.8h, v9.8h, v8.8h // MUL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZPmZ_B | mul z17.b, p6/m, z17.b, z9.b // MUL <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZPmZ_H | mul z18.h, p7/m, z18.h, z15.h // MUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | MUL_ZPmZ_S | mul z29.s, p6/m, z29.s, z8.s // MUL <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | MUL_ZPmZ_D | mul z25.d, p1/m, z25.d, z25.d // MUL <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDWrrr | mul w8, w13, w20 // MUL <Wd>, <Wn>, <Wm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | MADDXrrr | mul x12, x8, x25 // MUL <Xd>, <Xn>, <Xm> \\ Multiply \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w0, w18 // MVN <Wd>, <Wm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | mvn w25, w27, asr #6 // MVN <Wd>, <Wm>, <shift> #<wamount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x1, x21 // MVN <Xd>, <Xm> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | mvn x9, x23, asr #39 // MVN <Xd>, <Xm>, <shift> #<amount> \\ Move, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv16i8 | mvn v16.16b, v24.16b // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i16 | mvni v9.4h, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv8i16 | mvni v8.8h, #171, lsl #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv2i32 | mvni v7.2s, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4i32 | mvni v22.4s, #15, lsl #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | MVNIv4s_msl | mvni v12.4s, #141, msl #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | NOTv16i8 | mvn v16.16b, v24.16b // MVN <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MVNIv4i16 | mvni v9.4h, #237 // MVNI <Vd>.<Th>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MVNIv8i16 | mvni v8.8h, #171, lsl #8 // MVNI <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MVNIv2i32 | mvni v7.2s, #81 // MVNI <Vd>.<Ts>, #<imm8> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MVNIv4i32 | mvni v22.4s, #15, lsl #8 // MVNI <Vd>.<Ts>, #<imm8>, LSL #<amounts> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | MVNIv4s_msl | mvni v12.4s, #141, msl #8 // MVNI <Vd>.<Ts>, #<imm8>, MSL #<amountones> \\ ASIMD move, integer immed \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NAND_PPzPP | nand p5.b, p4/z, p5.b, p5.b // NAND <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NANDS_PPzPP | nands p6.b, p3/z, p4.b, p5.b // NANDS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | neg w25, w20, lsl #4 // NEG <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
@@ -6060,9 +6063,9 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | neg x29, x11, lsl #3 // NEG <Xd>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x24, x10, lsl #54 // NEG <Xd>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | neg x0, x16, lsr #2 // NEG <Xd>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv1i64 | neg d18, d20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NEGv2i64 | neg v16.2d, v14.2d // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NEG_ZPmZ_B | neg z16.b, p2/m, z15.b // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | NEGv1i64 | neg d18, d20 // NEG <V><d>, <V><n> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | NEGv2i64 | neg v16.2d, v14.2d // NEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | NEG_ZPmZ_B | neg z16.b, p2/m, z15.b // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrs | negs w30, w22, lsl #2 // NEGS <Wd>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w8, w8, lsl #15 // NEGS <Wd>, <Wm>, LSL #<wamounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSWrs | negs w12, w21, asr #15 // NEGS <Wd>, <Wm>, <shift> #<wamount> \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
@@ -6077,44 +6080,44 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | NOR_PPzPP | nor p4.b, p4/z, p0.b, p4.b // NOR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | NORS_PPzPP | nors p1.b, p0/z, p7.b, p6.b // NORS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | EOR_PPzPP | not p7.b, p2/z, p6.b // NOT <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | NOT_ZPmZ_S | not z29.s, p4/m, z9.s // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | NOTv8i8 | mvn v15.8b, v29.8b // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | NOT_ZPmZ_S | not z29.s, p4/m, z9.s // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | NOTv8i8 | mvn v15.8b, v29.8b // NOT <Vd>.<T>, <Vn>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | EORS_PPzPP | nots p7.b, p3/z, p1.b // NOTS <Pd>.B, <Pg>/Z, <Pn>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z5.b, z5.b, #0x8f // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.h, z14.h, #0xff9f // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z14.s, z14.s, #0xfffffffd // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z27.d, z27.d, #0xfffffffffffffffb // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z5.b, z5.b, #0x8f // ORN <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z14.h, z14.h, #0xff9f // ORN <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z14.s, z14.s, #0xfffffffd // ORN <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z27.d, z27.d, #0xfffffffffffffffb // ORN <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORN_PPzPP | orn p1.b, p2/z, p3.b, p5.b // ORN <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w2, w27, w7 // ORN <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNWrs | orn w6, w28, w14, lsl #19 // ORN <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x22, x12, x3 // ORN <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORNXrs | orn x19, x17, x0, lsl #58 // ORN <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORNv8i8 | orn v29.8b, v19.8b, v16.8b // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORNv8i8 | orn v29.8b, v19.8b, v16.8b // ORN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORNS_PPzPP | orns p3.b, p3/z, p0.b, p3.b // ORNS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWri | orr wsp, w27, #0xe00 // ORR <Wd|WSP>, <Wn>, #<imms> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXri | orr x27, x6, #0x1e00 // ORR <Xd|SP>, <Xn>, #<immd> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z4.b, z4.b, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z26.h, z26.h, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z3.s, z3.s, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZI | orr z30.d, z30.d, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z4.b, z4.b, #0x70 // ORR <Zdn>.B, <Zdn>.B, #<constb> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z26.h, z26.h, #0x60 // ORR <Zdn>.H, <Zdn>.H, #<consth> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z3.s, z3.s, #0x2 // ORR <Zdn>.S, <Zdn>.S, #<consts> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZI | orr z30.d, z30.d, #0x4 // ORR <Zdn>.D, <Zdn>.D, #<constd> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ORR_PPzPP | orr p6.b, p4/z, p4.b, p3.b // ORR <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical \\ 1 1 1 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w14, w1, w23 // ORR <Wd>, <Wn>, <Wm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRWrs | orr w25, w22, w0, asr #20 // ORR <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x11, x6, x13 // ORR <Xd>, <Xn>, <Xm> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | ORRXrs | orr x26, x26, x7, lsl #62 // ORR <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, no flagset \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i16 | orr v9.4h, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv8i16 | orr v20.8h, #175 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v4.4s, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv4i32 | orr v17.4s, #119, lsl #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ORRv16i8 | orr v12.16b, v9.16b, v1.16b // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZPmZ_H | orr z28.h, p3/m, z28.h, z7.h // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | ORR_ZZZ | orr z8.d, z14.d, z19.d // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv4i16 | orr v9.4h, #18 // ORR <Vd>.<Th>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv8i16 | orr v20.8h, #175 // ORR <Vd>.<Th>, #<imm8>, LSL #<amounth> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv4i32 | orr v4.4s, #0 // ORR <Vd>.<T>, #<imm8> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv4i32 | orr v17.4s, #119, lsl #24 // ORR <Vd>.<T>, #<imm8>, LSL #<amount> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ORRv16i8 | orr v12.16b, v9.16b, v1.16b // ORR <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD logical \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZPmZ_H | orr z28.h, p3/m, z28.h, z7.h // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | ORR_ZZZ | orr z8.d, z14.d, z19.d // ORR <Zd>.D, <Zn>.D, <Zm>.D \\ Logical \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | ORRS_PPzPP | orrs p7.b, p7/z, p6.b, p5.b // ORRS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B \\ Predicate logical, flag setting \\ 1 2 2 0.5 V1UnitM0[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[4], V1UnitV01[4] | ORV_VPZ_D | orv d19, p6, z31.d // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 1 12 12 0.5 V1UnitV01[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE01[8], V1UnitV[8] | ORV_VPZ_D | orv d19, p6, z31.d // ORV <V><d>, <Pg>, <Zn>.<T> \\ Reduction, logical \\ 2 12 12 0.5 V1UnitSVE01[8],V1UnitSVE01[8]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFALSE | pfalse p6.b // PFALSE <Pd>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PFIRST_B | pfirst p0.b, p5, p0.b // PFIRST <Pdn>.B, <Pg>, <Pdn>.B \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv8i8 | pmul v30.8b, v0.8b, v27.8b // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV01 | PMULv16i8 | pmul v7.16b, v20.16b, v18.16b // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | PMULv8i8 | pmul v30.8b, v0.8b, v27.8b // PMUL <Vd>.8B, <Vn>.8B, <Vm>.8B \\ ASIMD multiply/multiply long (8x8) polynomial, D-form \\ 1 3 3 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | PMULv16i8 | pmul v7.16b, v20.16b, v18.16b // PMUL <Vd>.16B, <Vn>.16B, <Vm>.16B \\ ASIMD multiply/multiply long (8x8) polynomial, Q-form \\ 1 3 3 2.0 V1UnitV01
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PNEXT_S | pnext p5.s, p5, p5.s // PNEXT <Pdn>.<T>, <Pv>, <Pdn>.<T> \\ Predicate set/initialize/find next \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p5, [x21] // PRFB #<imm4>, <Pg>, [<Xn|SP>] \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01 | PRFB_PRI | prfb #14, p3, [x28, #-24, mul vl] // PRFB #<imm4>, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ No description \\ No scheduling info
@@ -6215,12 +6218,12 @@ test:
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | PTRUES_D | ptrues p2.d, vl128 // PTRUES <Pd>.<T>, #<uimm5> \\ Predicate set/initialize, set flags \\ 1 3 3 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKHI_PP | punpkhi p4.h, p4.b // PUNPKHI <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | PUNPKLO_PP | punpklo p1.h, p4.b // PUNPKLO <Pd>.H, <Pn>.B \\ Predicate unpack and widen \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v2i32 | raddhn v17.2s, v22.2d, v5.2d // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RADDHNv2i64_v4i32 | raddhn2 v21.4s, v11.2d, v1.2d // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RBITv16i8 | rbit v16.16b, v21.16b // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | RADDHNv2i64_v2i32 | raddhn v17.2s, v22.2d, v5.2d // RADDHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | RADDHNv2i64_v4i32 | raddhn2 v21.4s, v11.2d, v1.2d // RADDHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | RBITv16i8 | rbit v16.16b, v21.16b // RBIT <Vd>.<T>, <Vn>.<T> \\ ASIMD bit reverse \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITWr | rbit w27, w10 // RBIT <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RBITXr | rbit x30, x0 // RBIT <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | RBIT_ZPmZ_S | rbit z23.s, p3/m, z10.s // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | RBIT_ZPmZ_S | rbit z23.s, p3/m, z10.s // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Count/reverse bits \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | RDFFR_PPz | rdffr p2.b, p1/z // RDFFR <Pd>.B, <Pg>/Z \\ Read first fault register, predicated \\ 1 3 3 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | RDFFR_P | rdffr p5.b // RDFFR <Pd>.B \\ Read first fault register, unpredicated \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 4 | 4 | 0.33 | V1UnitI[6], V1UnitM[6] | RDFFRS_PPz | rdffrs p7.b, p2/z // RDFFRS <Pd>.B, <Pg>/Z \\ Read first fault register and set flags \\ 1 4 4 0.33 V1UnitM[6]
@@ -6228,54 +6231,54 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret // RET \\ Branch, register \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | RET | ret x14 // RET {<Xn>} \\ Branch, register \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | REV_PP_H | rev p1.h, p2.h // REV <Pd>.<T>, <Pn>.<T> \\ Predicate reverse \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REV_ZZ_D | rev z11.d, z24.d // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | REV_ZZ_D | rev z11.d, z24.d // REV <Zd>.<T>, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVWr | rev w19, w20 // REV <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x30, x15 // REV <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV16v16i8 | rev16 v5.16b, v26.16b // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | REV16v16i8 | rev16 v5.16b, v26.16b // REV16 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Wr | rev16 w1, w25 // REV16 <Wd>, <Wn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV16Xr | rev16 x27, x11 // REV16 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV32v8i16 | rev32 v22.8h, v4.8h // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | REV32v8i16 | rev32 v22.8h, v4.8h // REV32 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REV32Xr | rev32 x30, x6 // REV32 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | REVXr | rev x5, x2 // REV64 <Xd>, <Xn> \\ Reverse bits/bytes \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | REV64v2i32 | rev64 v0.2s, v19.2s // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVB_ZPmZ_D | revb z3.d, p2/m, z21.d // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVH_ZPmZ_D | revh z1.d, p5/m, z19.d // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | REVW_ZPmZ_D | revw z16.d, p1/m, z3.d // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | REV64v2i32 | rev64 v0.2s, v19.2s // REV64 <Vd>.<T>, <Vn>.<T> \\ ASIMD reverse \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | REVB_ZPmZ_D | revb z3.d, p2/m, z21.d // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | REVH_ZPmZ_D | revh z1.d, p5/m, z19.d // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | REVW_ZPmZ_D | revw z16.d, p1/m, z3.d // REVW <Zd>.D, <Pg>/M, <Zn>.D \\ Reverse, vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRWrri | ror w20, w13, #21 // ROR <Wd>, <Ws>, #<shifts> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | EXTRXrri | ror x5, x8, #7 // ROR <Xd>, <Xs>, #<shiftd> \\ Move, shift by immed, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w29, w26, w0 // ROR <Wd>, <Wn>, <Wm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x4, x13, x3 // ROR <Xd>, <Xn>, <Xm> \\ Move, shift by register, no flagset, unconditional \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVWr | ror w26, w0, w28 // RORV <Wd>, <Wn>, <Wm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | RORVXr | ror x21, x29, x17 // RORV <Xd>, <Xn>, <Xm> \\ Variable shift \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i8_shift | rshrn v24.8b, v0.8h, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i16_shift | rshrn v8.4h, v24.4s, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv2i32_shift | rshrn v12.2s, v12.2d, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv16i8_shift | rshrn2 v1.16b, v16.8h, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv8i16_shift | rshrn2 v1.8h, v28.4s, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | RSHRNv4i32_shift | rshrn2 v20.4s, v19.2d, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv8i16_v8i8 | rsubhn v3.8b, v9.8h, v16.8h // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | RSUBHNv2i64_v4i32 | rsubhn2 v31.4s, v12.2d, v15.2d // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABAv16i8 | saba v8.16b, v27.16b, v13.16b // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv2i32_v2i64 | sabal v2.2d, v5.2s, v31.2s // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SABALv4i32_v2i64 | sabal2 v21.2d, v15.4s, v13.4s // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDv2i32 | sabd v12.2s, v11.2s, v27.2s // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SABD_ZPmZ_S | sabd z14.s, p1/m, z14.s, z23.s // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv2i32_v2i64 | sabdl v28.2d, v4.2s, v19.2s // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SABDLv16i8_v8i16 | sabdl2 v10.8h, v30.16b, v4.16b // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SADALPv8i8_v4i16 | sadalp v3.4h, v5.8b // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i8_v8i16 | saddl v7.8h, v3.8b, v23.8b // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLv8i16_v4i32 | saddl2 v21.4s, v5.8h, v10.8h // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDLPv16i8_v8i16 | saddlp v13.8h, v29.16b // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv8i8v | saddlv h18, v28.8b // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SADDLVv16i8v | saddlv h30, v4.16b // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i16v | saddlv s24, v29.4h // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv8i16v | saddlv s22, v23.8h // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SADDLVv4i32v | saddlv d2, v27.4s // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_B | saddv d19, p6, z1.b // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_H | saddv d7, p2, z14.h // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SADDV_VPZ_S | saddv d4, p7, z27.s // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv4i16_v4i32 | saddw v8.4s, v0.4s, v1.4h // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SADDWv16i8_v8i16 | saddw2 v24.8h, v10.8h, v30.16b // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv8i8_shift | rshrn v24.8b, v0.8h, #4 // RSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv4i16_shift | rshrn v8.4h, v24.4s, #16 // RSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv2i32_shift | rshrn v12.2s, v12.2d, #28 // RSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv16i8_shift | rshrn2 v1.16b, v16.8h, #6 // RSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv8i16_shift | rshrn2 v1.8h, v28.4s, #3 // RSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | RSHRNv4i32_shift | rshrn2 v20.4s, v19.2d, #14 // RSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | RSUBHNv8i16_v8i8 | rsubhn v3.8b, v9.8h, v16.8h // RSUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | RSUBHNv2i64_v4i32 | rsubhn2 v31.4s, v12.2d, v15.2d // RSUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SABAv16i8 | saba v8.16b, v27.16b, v13.16b // SABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SABALv2i32_v2i64 | sabal v2.2d, v5.2s, v31.2s // SABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SABALv4i32_v2i64 | sabal2 v21.2d, v15.4s, v13.4s // SABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SABDv2i32 | sabd v12.2s, v11.2s, v27.2s // SABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SABD_ZPmZ_S | sabd z14.s, p1/m, z14.s, z23.s // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SABDLv2i32_v2i64 | sabdl v28.2d, v4.2s, v19.2s // SABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SABDLv16i8_v8i16 | sabdl2 v10.8h, v30.16b, v4.16b // SABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SADALPv8i8_v4i16 | sadalp v3.4h, v5.8b // SADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SADDLv8i8_v8i16 | saddl v7.8h, v3.8b, v23.8b // SADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SADDLv8i16_v4i32 | saddl2 v21.4s, v5.8h, v10.8h // SADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SADDLPv16i8_v8i16 | saddlp v13.8h, v29.16b // SADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SADDLVv8i8v | saddlv h18, v28.8b // SADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SADDLVv16i8v | saddlv h30, v4.16b // SADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SADDLVv4i16v | saddlv s24, v29.4h // SADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SADDLVv8i16v | saddlv s22, v23.8h // SADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SADDLVv4i32v | saddlv d2, v27.4s // SADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SADDV_VPZ_B | saddv d19, p6, z1.b // SADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SADDV_VPZ_H | saddv d7, p2, z14.h // SADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SADDV_VPZ_S | saddv d4, p7, z27.s // SADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SADDWv4i16_v4i32 | saddw v8.4s, v0.4s, v1.4h // SADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SADDWv16i8_v8i16 | saddw2 v24.8h, v10.8h, v30.16b // SADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCWr | sbc w0, w16, w1 // SBC <Wd>, <Wn>, <Wm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBCXr | sbc x19, x3, x9 // SBC <Xd>, <Xn>, <Xm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SBCSWr | sbcs w26, w28, w0 // SBCS <Wd>, <Wn>, <Wm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -6298,143 +6301,143 @@ test:
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXHri | scvtf h21, x12 // SCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXSri | scvtf s25, x28 // SCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SCVTFUXDri | scvtf d12, x0 // SCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFh | scvtf h4, h8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFs | scvtf s29, s12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFd | scvtf d1, d12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i16_shift | scvtf v25.4h, v13.4h, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | SCVTFv8i16_shift | scvtf v4.8h, v8.8h, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i32_shift | scvtf v5.2s, v2.2s, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4i32_shift | scvtf v2.4s, v24.4s, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2i64_shift | scvtf v11.2d, v2.2d, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i16 | scvtf h5, h14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i32 | scvtf s5, s16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv1i64 | scvtf d12, d11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f16 | scvtf v22.4h, v10.4h // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | SCVTFv8f16 | scvtf v16.8h, v13.8h // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f32 | scvtf v9.2s, v31.2s // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | SCVTFv4f32 | scvtf v2.4s, v7.4s // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SCVTFv2f64 | scvtf v18.2d, v11.2d // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | SCVTF_ZPmZ_HtoH | scvtf z3.h, p3/m, z29.h // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoH | scvtf z1.h, p5/m, z27.s // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoS | scvtf z30.s, p4/m, z29.s // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SCVTF_ZPmZ_StoD | scvtf z18.d, p3/m, z16.s // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoH | scvtf z18.h, p1/m, z14.d // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoS | scvtf z10.s, p1/m, z11.d // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SCVTF_ZPmZ_DtoD | scvtf z3.d, p2/m, z27.d // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFh | scvtf h4, h8, #9 // SCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFs | scvtf s29, s12, #1 // SCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFd | scvtf d1, d12, #26 // SCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTFv4i16_shift | scvtf v25.4h, v13.4h, #8 // SCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SCVTFv8i16_shift | scvtf v4.8h, v8.8h, #10 // SCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv2i32_shift | scvtf v5.2s, v2.2s, #26 // SCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTFv4i32_shift | scvtf v2.4s, v24.4s, #10 // SCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv2i64_shift | scvtf v11.2d, v2.2d, #42 // SCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv1i16 | scvtf h5, h14 // SCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv1i32 | scvtf s5, s16 // SCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv1i64 | scvtf d12, d11 // SCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTFv4f16 | scvtf v22.4h, v10.4h // SCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SCVTFv8f16 | scvtf v16.8h, v13.8h // SCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv2f32 | scvtf v9.2s, v31.2s // SCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTFv4f32 | scvtf v2.4s, v7.4s // SCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SCVTFv2f64 | scvtf v18.2d, v11.2d // SCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | SCVTF_ZPmZ_HtoH | scvtf z3.h, p3/m, z29.h // SCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SCVTF_ZPmZ_StoH | scvtf z1.h, p5/m, z27.s // SCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SCVTF_ZPmZ_StoS | scvtf z30.s, p4/m, z29.s // SCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SCVTF_ZPmZ_StoD | scvtf z18.d, p3/m, z16.s // SCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTF_ZPmZ_DtoH | scvtf z18.h, p1/m, z14.d // SCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTF_ZPmZ_DtoS | scvtf z10.s, p1/m, z11.d // SCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SCVTF_ZPmZ_DtoD | scvtf z3.d, p2/m, z27.d // SCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
# CHECK-NEXT: 1 | 12 | 12 | 0.08 | V1UnitI[12], V1UnitM[12], V1UnitM0[12] | SDIVWr | sdiv w6, w28, w24 // SDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitI[20], V1UnitM[20], V1UnitM0[20] | SDIVXr | sdiv x19, x2, x14 // SDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
-# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | SDIV_ZPmZ_S | sdiv z24.s, p1/m, z24.s, z14.s // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
-# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | SDIV_ZPmZ_D | sdiv z7.d, p6/m, z7.d, z20.d // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
-# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | SDIVR_ZPmZ_S | sdivr z10.s, p2/m, z10.s, z7.s // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
-# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | SDIVR_ZPmZ_D | sdivr z0.d, p3/m, z0.d, z9.d // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZI_S | sdot z6.s, z29.b, z0.b[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZI_D | sdot z0.d, z18.h, z10.h[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | SDOT_ZZZ_S | sdot z28.s, z30.b, z14.b // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SDOT_ZZZ_D | sdot z19.d, z5.h, z8.h // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTlanev16i8 | sdot v2.4s, v27.16b, v5.4b[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SDOTv8i8 | sdot v3.2s, v20.8b, v10.8b // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 2 | 12 | 12 | 0.09 | V1UnitSVE0[22], V1UnitSVE01[22], V1UnitV[22], V1UnitV02[22] | SDIV_ZPmZ_S | sdiv z24.s, p1/m, z24.s, z14.s // SDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+# CHECK-NEXT: 2 | 20 | 20 | 0.05 | V1UnitSVE0[40], V1UnitSVE01[40], V1UnitV[40], V1UnitV02[40] | SDIV_ZPmZ_D | sdiv z7.d, p6/m, z7.d, z20.d // SDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+# CHECK-NEXT: 2 | 12 | 12 | 0.09 | V1UnitSVE0[22], V1UnitSVE01[22], V1UnitV[22], V1UnitV02[22] | SDIVR_ZPmZ_S | sdivr z10.s, p2/m, z10.s, z7.s // SDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+# CHECK-NEXT: 2 | 20 | 20 | 0.05 | V1UnitSVE0[40], V1UnitSVE01[40], V1UnitV[40], V1UnitV02[40] | SDIVR_ZPmZ_D | sdivr z0.d, p3/m, z0.d, z9.d // SDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+# CHECK-NEXT: 2 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SDOT_ZZZI_S | sdot z6.s, z29.b, z0.b[2] // SDOT <Zda>.S, <Zn>.B, <Zmb>.B[<imms>] \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 1 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SDOT_ZZZI_D | sdot z0.d, z18.h, z10.h[1] // SDOT <Zda>.D, <Zn>.H, <Zmh>.H[<immd>] \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SDOT_ZZZ_S | sdot z28.s, z30.b, z14.b // SDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 1 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SDOT_ZZZ_D | sdot z19.d, z5.h, z8.h // SDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | SDOTlanev16i8 | sdot v2.4s, v27.16b, v5.4b[0] // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<indexs>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | SDOTv8i8 | sdot v3.2s, v20.8b, v10.8b // SDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SEL_PPPP | sel p1.b, p7, p5.b, p4.b // SEL <Pd>.B, <Pg>, <Pn>.B, <Pm>.B \\ Predicate select \\ 1 1 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SEL_ZPZZ_H | sel z0.h, p7, z13.h, z13.h // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SEL_ZPZZ_H | sel z0.h, p7, z13.h, z13.h // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T> \\ Select, vector form \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SETFFR | setffr // SETFFR \\ Set first fault register \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sev // SEV \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | sevl // SEVL \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHADDv16i8 | shadd v25.16b, v1.16b, v10.16b // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLd | shl d17, d3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i8_shift | shl v23.8b, v18.8b, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv8i16_shift | shl v0.8h, v23.8h, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv4i32_shift | shl v0.4s, v18.4s, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLv2i64_shift | shl v20.2d, v28.2d, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i8 | shll v3.8h, v13.8b, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i16 | shll v26.4s, v18.4h, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv2i32 | shll v4.2d, v25.2s, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv16i8 | shll2 v12.8h, v28.16b, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv8i16 | shll2 v11.4s, v22.8h, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHLLv4i32 | shll2 v2.2d, v29.4s, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i8_shift | shrn v27.8b, v23.8h, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i16_shift | shrn v17.4h, v1.4s, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv2i32_shift | shrn v13.2s, v0.2d, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv16i8_shift | shrn2 v4.16b, v29.8h, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv8i16_shift | shrn2 v9.8h, v18.4s, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SHRNv4i32_shift | shrn2 v5.4s, v12.2d, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SHSUBv8i16 | shsub v15.8h, v5.8h, v27.8h // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLId | sli d7, d19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv16i8_shift | sli v16.16b, v26.16b, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv4i16_shift | sli v14.4h, v10.4h, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i32_shift | sli v29.2s, v14.2s, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SLIv2i64_shift | sli v25.2d, v21.2d, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SHADDv16i8 | shadd v25.16b, v1.16b, v10.16b // SHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLd | shl d17, d3, #16 // SHL <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLv8i8_shift | shl v23.8b, v18.8b, #6 // SHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLv8i16_shift | shl v0.8h, v23.8h, #10 // SHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLv4i32_shift | shl v0.4s, v18.4s, #30 // SHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLv2i64_shift | shl v20.2d, v28.2d, #40 // SHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv8i8 | shll v3.8h, v13.8b, #8 // SHLL <Vd>.8H, <Vn>.8B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv4i16 | shll v26.4s, v18.4h, #16 // SHLL <Vd>.4S, <Vn>.4H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv2i32 | shll v4.2d, v25.2s, #32 // SHLL <Vd>.2D, <Vn>.2S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv16i8 | shll2 v12.8h, v28.16b, #8 // SHLL2 <Vd>.8H, <Vn>.16B, #8 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv8i16 | shll2 v11.4s, v22.8h, #16 // SHLL2 <Vd>.4S, <Vn>.8H, #16 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHLLv4i32 | shll2 v2.2d, v29.4s, #32 // SHLL2 <Vd>.2D, <Vn>.4S, #32 \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv8i8_shift | shrn v27.8b, v23.8h, #3 // SHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv4i16_shift | shrn v17.4h, v1.4s, #13 // SHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv2i32_shift | shrn v13.2s, v0.2d, #12 // SHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv16i8_shift | shrn2 v4.16b, v29.8h, #8 // SHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv8i16_shift | shrn2 v9.8h, v18.4s, #10 // SHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SHRNv4i32_shift | shrn2 v5.4s, v12.2d, #16 // SHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SHSUBv8i16 | shsub v15.8h, v5.8h, v27.8h // SHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SLId | sli d7, d19, #53 // SLI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SLIv16i8_shift | sli v16.16b, v26.16b, #7 // SLI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SLIv4i16_shift | sli v14.4h, v10.4h, #15 // SLI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SLIv2i32_shift | sli v29.2s, v14.2s, #13 // SLI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SLIv2i64_shift | sli v25.2d, v21.2d, #41 // SLI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMADDLrrr | smaddl x17, w27, w30, x3 // SMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZI_S | smax z3.s, z3.s, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMAX_ZPmZ_B | smax z0.b, p5/m, z0.b, z20.b // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXv16i8 | smax v30.16b, v3.16b, v30.16b // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMAXPv8i16 | smaxp v21.8h, v16.8h, v7.8h // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv8i8v | smaxv b4, v30.8b // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMAXVv16i8v | smaxv b15, v16.16b // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i16v | smaxv h28, v14.4h // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv8i16v | smaxv h6, v19.8h // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMAXVv4i32v | smaxv s3, v14.4s // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_B | smaxv b19, p4, z14.b // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_H | smaxv h0, p6, z20.h // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_S | smaxv s11, p2, z28.s // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMAXV_VPZ_D | smaxv d24, p5, z24.d // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SMAX_ZI_S | smax z3.s, z3.s, #-39 // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SMAX_ZPmZ_B | smax z0.b, p5/m, z0.b, z20.b // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SMAXv16i8 | smax v30.16b, v3.16b, v30.16b // SMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SMAXPv8i16 | smaxp v21.8h, v16.8h, v7.8h // SMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMAXVv8i8v | smaxv b4, v30.8b // SMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SMAXVv16i8v | smaxv b15, v16.16b // SMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMAXVv4i16v | smaxv h28, v14.4h // SMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMAXVv8i16v | smaxv h6, v19.8h // SMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMAXVv4i32v | smaxv s3, v14.4s // SMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMAXV_VPZ_B | smaxv b19, p4, z14.b // SMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMAXV_VPZ_H | smaxv h0, p6, z20.h // SMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMAXV_VPZ_S | smaxv s11, p2, z28.s // SMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMAXV_VPZ_D | smaxv d24, p5, z24.d // SMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SMC | smc #0x7e57 // SMC #<imm> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZI_S | smin z21.s, z21.s, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SMIN_ZPmZ_S | smin z22.s, p0/m, z22.s, z30.s // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINv4i32 | smin v29.4s, v24.4s, v24.4s // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SMINPv8i16 | sminp v7.8h, v27.8h, v7.8h // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMINVv8i8v | sminv b6, v11.8b // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | SMINVv16i8v | sminv b24, v8.16b // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i16v | sminv h24, v23.4h // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SMINVv8i16v | sminv h2, v9.8h // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SMINVv4i32v | sminv s16, v15.4s // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_B | sminv b4, p2, z10.b // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_H | sminv h15, p7, z10.h // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_S | sminv s29, p0, z27.s // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | SMINV_VPZ_D | sminv d17, p2, z18.d // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i16_indexed | smlal v16.4s, v9.4h, v11.h[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv2i32_indexed | smlal v0.2d, v25.2s, v1.s[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_indexed | smlal2 v1.4s, v9.8h, v0.h[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv4i32_indexed | smlal2 v30.2d, v22.4s, v7.s[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i8_v8i16 | smlal v25.8h, v24.8b, v28.8b // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLALv8i16_v4i32 | smlal2 v30.4s, v31.8h, v13.8h // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_indexed | smlsl v14.4s, v23.4h, v12.h[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv2i32_indexed | smlsl v25.2d, v27.2s, v1.s[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_indexed | smlsl2 v12.4s, v11.8h, v12.h[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i32_indexed | smlsl2 v11.2d, v28.4s, v7.s[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv4i16_v4i32 | smlsl v11.4s, v14.4h, v15.4h // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | SMLSLv8i16_v4i32 | smlsl2 v21.4s, v27.8h, v16.8h // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SMMLA | smmla v0.4s, v17.16b, v31.16b // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SMIN_ZI_S | smin z21.s, z21.s, #59 // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SMIN_ZPmZ_S | smin z22.s, p0/m, z22.s, z30.s // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SMINv4i32 | smin v29.4s, v24.4s, v24.4s // SMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SMINPv8i16 | sminp v7.8h, v27.8h, v7.8h // SMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMINVv8i8v | sminv b6, v11.8b // SMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SMINVv16i8v | sminv b24, v8.16b // SMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMINVv4i16v | sminv h24, v23.4h // SMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMINVv8i16v | sminv h2, v9.8h // SMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SMINVv4i32v | sminv s16, v15.4s // SMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMINV_VPZ_B | sminv b4, p2, z10.b // SMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMINV_VPZ_H | sminv h15, p7, z10.h // SMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMINV_VPZ_S | sminv s29, p0, z27.s // SMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | SMINV_VPZ_D | sminv d17, p2, z18.d // SMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv4i16_indexed | smlal v16.4s, v9.4h, v11.h[4] // SMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv2i32_indexed | smlal v0.2d, v25.2s, v1.s[1] // SMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv8i16_indexed | smlal2 v1.4s, v9.8h, v0.h[6] // SMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv4i32_indexed | smlal2 v30.2d, v22.4s, v7.s[2] // SMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv8i8_v8i16 | smlal v25.8h, v24.8b, v28.8b // SMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLALv8i16_v4i32 | smlal2 v30.4s, v31.8h, v13.8h // SMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv4i16_indexed | smlsl v14.4s, v23.4h, v12.h[7] // SMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv2i32_indexed | smlsl v25.2d, v27.2s, v1.s[1] // SMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv8i16_indexed | smlsl2 v12.4s, v11.8h, v12.h[0] // SMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv4i32_indexed | smlsl2 v11.2d, v28.4s, v7.s[2] // SMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv4i16_v4i32 | smlsl v11.4s, v14.4h, v15.4h // SMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMLSLv8i16_v4i32 | smlsl2 v21.4s, v27.8h, v16.8h // SMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | SMMLA | smmla v0.4s, v17.16b, v31.16b // SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMSUBLrrr | smnegl x3, w23, w18 // SMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to32_idx0 | smov w15, v22.b[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to32 | smov w6, v28.b[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to32_idx0 | smov w26, v27.h[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to32 | smov w18, v29.h[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to64_idx0 | smov x21, v0.b[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi8to64 | smov x16, v29.b[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to64_idx0 | smov x9, v27.h[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi16to64 | smov x4, v21.h[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi32to64_idx0 | smov x15, v3.s[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | SMOVvi32to64 | smov x5, v29.s[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi8to32_idx0 | smov w15, v22.b[0] // SMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi8to32 | smov w6, v28.b[9] // SMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi16to32_idx0 | smov w26, v27.h[0] // SMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi16to32 | smov w18, v29.h[6] // SMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi8to64_idx0 | smov x21, v0.b[0] // SMOV <Xd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi8to64 | smov x16, v29.b[8] // SMOV <Xd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi16to64_idx0 | smov x9, v27.h[0] // SMOV <Xd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi16to64 | smov x4, v21.h[2] // SMOV <Xd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi32to64_idx0 | smov x15, v3.s[0] // SMOV <Xd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | SMOVvi32to64 | smov x5, v29.s[1] // SMOV <Xd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SMSUBLrrr | smsubl x8, w24, w13, x6 // SMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_B | smulh z11.b, p5/m, z11.b, z17.b // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_H | smulh z8.h, p0/m, z8.h, z4.h // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | SMULH_ZPmZ_S | smulh z27.s, p7/m, z27.s, z30.s // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | SMULH_ZPmZ_D | smulh z4.d, p7/m, z4.d, z28.d // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SMULH_ZPmZ_B | smulh z11.b, p5/m, z11.b, z17.b // SMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SMULH_ZPmZ_H | smulh z8.h, p0/m, z8.h, z4.h // SMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | SMULH_ZPmZ_S | smulh z27.s, p7/m, z27.s, z30.s // SMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | SMULH_ZPmZ_D | smulh z4.d, p7/m, z4.d, z28.d // SMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | SMULHrr | smulh x8, x29, x17 // SMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SMADDLrrr | smull x19, w0, w6 // SMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i16_indexed | smull v3.4s, v26.4h, v1.h[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_indexed | smull v31.2d, v23.2s, v6.s[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv8i16_indexed | smull2 v13.4s, v18.8h, v0.h[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_indexed | smull2 v11.2d, v1.4s, v7.s[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv2i32_v2i64 | smull v28.2d, v26.2s, v20.2s // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SMULLv4i32_v2i64 | smull2 v7.2d, v14.4s, v15.4s // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv1i64 | sqabs d15, d26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQABSv8i16 | sqabs v25.8h, v24.8h // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_B | sqadd z1.b, z1.b, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_H | sqadd z18.h, z18.h, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZI_D | sqadd z3.d, z3.d, #158 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQADD_ZZZ_D | sqadd z19.d, z27.d, z28.d // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv1i16 | sqadd h12, h18, h10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQADDv2i32 | sqadd v15.2s, v13.2s, v28.2s // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv4i16_indexed | smull v3.4s, v26.4h, v1.h[5] // SMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv2i32_indexed | smull v31.2d, v23.2s, v6.s[2] // SMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv8i16_indexed | smull2 v13.4s, v18.8h, v0.h[3] // SMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv4i32_indexed | smull2 v11.2d, v1.4s, v7.s[0] // SMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv2i32_v2i64 | smull v28.2d, v26.2s, v20.2s // SMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SMULLv4i32_v2i64 | smull2 v7.2d, v14.4s, v15.4s // SMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQABSv1i64 | sqabs d15, d26 // SQABS <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQABSv8i16 | sqabs v25.8h, v24.8h // SQABS <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQADD_ZI_B | sqadd z1.b, z1.b, #164 // SQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQADD_ZI_H | sqadd z18.h, z18.h, #166 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQADD_ZI_D | sqadd z3.d, z3.d, #158 // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQADD_ZZZ_D | sqadd z19.d, z27.d, z28.d // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQADDv1i16 | sqadd h12, h18, h10 // SQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQADDv2i32 | sqadd v15.2s, v13.2s, v28.2s // SQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x26, w26 // SQDECB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x16, w16, vl64 // SQDECB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECB_XPiWdI | sqdecb x4, w4, vl1, mul #16 // SQDECB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -6447,63 +6450,63 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x18 // SQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x11, vl5 // SQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECD_XPiI | sqdecd x21, all, mul #13 // SQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z27.d // SQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z2.d, vl128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECD_ZPiI | sqdecd z23.d, vl1, mul #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECD_ZPiI | sqdecd z27.d // SQDECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECD_ZPiI | sqdecd z2.d, vl128 // SQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECD_ZPiI | sqdecd z23.d, vl1, mul #16 // SQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x7, w7 // SQDECH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x10, w10, vl128 // SQDECH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiWdI | sqdech x16, w16, vl6, mul #11 // SQDECH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x6 // SQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x17, vl128 // SQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECH_XPiI | sqdech x27, vl128, mul #4 // SQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z16.h // SQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z21.h, vl6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECH_ZPiI | sqdech z7.h, mul3, mul #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECH_ZPiI | sqdech z16.h // SQDECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECH_ZPiI | sqdech z21.h, vl6 // SQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECH_ZPiI | sqdech z7.h, mul3, mul #7 // SQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XPWd_B | sqdecp x1, p4.b, w1 // SQDECP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECP_XP_D | sqdecp x26, p6.d // SQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | SQDECP_ZP_D | sqdecp z10.d, p3.d // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | SQDECP_ZP_D | sqdecp z10.d, p3.d // SQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x13, w13 // SQDECW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x2, w2, pow2 // SQDECW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiWdI | sqdecw x26, w26, vl8, mul #10 // SQDECW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x10 // SQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x17, vl128 // SQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQDECW_XPiI | sqdecw x13, mul4, mul #3 // SQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z7.s // SQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z10.s, pow2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQDECW_ZPiI | sqdecw z28.s, vl2, mul #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i32_indexed | sqdmlal s23, h16, v4.h[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv1i64_indexed | sqdmlal d12, s18, v3.s[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_indexed | sqdmlal v20.4s, v30.4h, v12.h[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv2i32_indexed | sqdmlal v11.2d, v24.2s, v0.s[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_indexed | sqdmlal2 v2.4s, v17.8h, v5.h[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i32_indexed | sqdmlal2 v23.2d, v30.4s, v6.s[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALi32 | sqdmlal d16, s12, s15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv4i16_v4i32 | sqdmlal v8.4s, v24.4h, v31.4h // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLALv8i16_v4i32 | sqdmlal2 v29.4s, v11.8h, v13.8h // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i32_indexed | sqdmlsl s26, h21, v11.h[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv1i64_indexed | sqdmlsl d6, s16, v3.s[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_indexed | sqdmlsl v4.4s, v22.4h, v13.h[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv2i32_indexed | sqdmlsl v26.2d, v7.2s, v3.s[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_indexed | sqdmlsl2 v2.4s, v28.8h, v4.h[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i32_indexed | sqdmlsl2 v4.2d, v3.4s, v3.s[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLi32 | sqdmlsl d13, s21, s8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv4i16_v4i32 | sqdmlsl v11.4s, v19.4h, v5.4h // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMLSLv8i16_v4i32 | sqdmlsl2 v27.4s, v8.8h, v22.8h // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16_indexed | sqdmulh h14, h17, v6.h[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i32_indexed | sqdmulh s19, s6, v6.s[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv4i16_indexed | sqdmulh v8.4h, v16.4h, v5.h[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32_indexed | sqdmulh v16.2s, v24.2s, v7.s[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv1i16 | sqdmulh h26, h21, h17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQDMULHv2i32 | sqdmulh v20.2s, v11.2s, v29.2s // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i32_indexed | sqdmull s25, h5, v1.h[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv1i64_indexed | sqdmull d29, s23, v0.s[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i16_indexed | sqdmull v8.4s, v19.4h, v1.h[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_indexed | sqdmull v20.2d, v10.2s, v6.s[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_indexed | sqdmull2 v10.4s, v25.8h, v0.h[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv4i32_indexed | sqdmull2 v4.2d, v29.4s, v2.s[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLi32 | sqdmull d19, s2, s0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv2i32_v2i64 | sqdmull v14.2d, v23.2s, v13.2s // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | SQDMULLv8i16_v4i32 | sqdmull2 v12.4s, v11.8h, v1.8h // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECW_ZPiI | sqdecw z7.s // SQDECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECW_ZPiI | sqdecw z10.s, pow2 // SQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQDECW_ZPiI | sqdecw z28.s, vl2, mul #15 // SQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv1i32_indexed | sqdmlal s23, h16, v4.h[7] // SQDMLAL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv1i64_indexed | sqdmlal d12, s18, v3.s[0] // SQDMLAL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv4i16_indexed | sqdmlal v20.4s, v30.4h, v12.h[3] // SQDMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv2i32_indexed | sqdmlal v11.2d, v24.2s, v0.s[3] // SQDMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv8i16_indexed | sqdmlal2 v2.4s, v17.8h, v5.h[6] // SQDMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv4i32_indexed | sqdmlal2 v23.2d, v30.4s, v6.s[0] // SQDMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALi32 | sqdmlal d16, s12, s15 // SQDMLAL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv4i16_v4i32 | sqdmlal v8.4s, v24.4h, v31.4h // SQDMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLALv8i16_v4i32 | sqdmlal2 v29.4s, v11.8h, v13.8h // SQDMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv1i32_indexed | sqdmlsl s26, h21, v11.h[1] // SQDMLSL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv1i64_indexed | sqdmlsl d6, s16, v3.s[1] // SQDMLSL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv4i16_indexed | sqdmlsl v4.4s, v22.4h, v13.h[2] // SQDMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv2i32_indexed | sqdmlsl v26.2d, v7.2s, v3.s[0] // SQDMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv8i16_indexed | sqdmlsl2 v2.4s, v28.8h, v4.h[6] // SQDMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv4i32_indexed | sqdmlsl2 v4.2d, v3.4s, v3.s[2] // SQDMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLi32 | sqdmlsl d13, s21, s8 // SQDMLSL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv4i16_v4i32 | sqdmlsl v11.4s, v19.4h, v5.4h // SQDMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMLSLv8i16_v4i32 | sqdmlsl2 v27.4s, v8.8h, v22.8h // SQDMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate saturating long \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv1i16_indexed | sqdmulh h14, h17, v6.h[6] // SQDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv1i32_indexed | sqdmulh s19, s6, v6.s[3] // SQDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv4i16_indexed | sqdmulh v8.4h, v16.4h, v5.h[4] // SQDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv2i32_indexed | sqdmulh v16.2s, v24.2s, v7.s[2] // SQDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv1i16 | sqdmulh h26, h21, h17 // SQDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULHv2i32 | sqdmulh v20.2s, v11.2s, v29.2s // SQDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv1i32_indexed | sqdmull s25, h5, v1.h[3] // SQDMULL S<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv1i64_indexed | sqdmull d29, s23, v0.s[2] // SQDMULL D<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv4i16_indexed | sqdmull v8.4s, v19.4h, v1.h[2] // SQDMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv2i32_indexed | sqdmull v20.2d, v10.2s, v6.s[2] // SQDMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv8i16_indexed | sqdmull2 v10.4s, v25.8h, v0.h[7] // SQDMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv4i32_indexed | sqdmull2 v4.2d, v29.4s, v2.s[3] // SQDMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLi32 | sqdmull d19, s2, s0 // SQDMULL <Va><d>, <Vb><n>, <Vb><m> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv2i32_v2i64 | sqdmull v14.2d, v23.2s, v13.2s // SQDMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQDMULLv8i16_v4i32 | sqdmull2 v12.4s, v11.8h, v1.8h // SQDMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x12, w12 // SQINCB <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x1, w1, vl8 // SQINCB <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCB_XPiWdI | sqincb x16, w16, vl2, mul #16 // SQINCB <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -6516,443 +6519,443 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x10 // SQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x17, vl5 // SQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCD_XPiI | sqincd x13, vl64 // SQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z24.d // SQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z10.d, vl128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCD_ZPiI | sqincd z29.d, vl128, mul #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCD_ZPiI | sqincd z24.d // SQINCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCD_ZPiI | sqincd z10.d, vl128 // SQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCD_ZPiI | sqincd z29.d, vl128, mul #12 // SQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x28, w28 // SQINCH <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x30, w30, vl1 // SQINCH <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiWdI | sqinch x16, w16, vl4, mul #2 // SQINCH <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x23 // SQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x10, vl64 // SQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCH_XPiI | sqinch x16, pow2, mul #2 // SQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z3.h // SQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z23.h, vl4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCH_ZPiI | sqinch z6.h, vl128, mul #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCH_ZPiI | sqinch z3.h // SQINCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCH_ZPiI | sqinch z23.h, vl4 // SQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCH_ZPiI | sqinch z6.h, vl128, mul #3 // SQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XPWd_H | sqincp x13, p2.h, w13 // SQINCP <Xdn>, <Pm>.<T>, <Wdn> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCP_XP_H | sqincp x0, p7.h // SQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | SQINCP_ZP_H | sqincp z9.h, p1.h // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | SQINCP_ZP_H | sqincp z9.h, p1.h // SQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x24, w24 // SQINCW <Xdn>, <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x16, w16, mul4 // SQINCW <Xdn>, <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiWdI | sqincw x27, w27, vl32, mul #15 // SQINCW <Xdn>, <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x29 // SQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x25, vl7 // SQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | SQINCW_XPiI | sqincw x21, vl8, mul #3 // SQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z30.s // SQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z8.s, mul3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQINCW_ZPiI | sqincw z0.s, vl5, mul #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv1i64 | sqneg d24, d22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQNEGv16i8 | sqneg v30.16b, v15.16b // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i16_indexed | sqrdmlah h14, h4, v6.h[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32_indexed | sqrdmlah s24, s17, v6.s[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv4i16_indexed | sqrdmlah v17.4h, v18.4h, v4.h[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv2i32_indexed | sqrdmlah v10.2s, v17.2s, v3.s[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv1i32 | sqrdmlah s3, s3, s5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLAHv8i16 | sqrdmlah v16.8h, v30.8h, v28.8h // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i16_indexed | sqrdmlsh h13, h26, v4.h[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32_indexed | sqrdmlsh s26, s29, v7.s[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv8i16_indexed | sqrdmlsh v1.8h, v21.8h, v8.h[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16_indexed | sqrdmlsh v8.4h, v11.4h, v1.h[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv2i32_indexed | sqrdmlsh v20.2s, v29.2s, v4.s[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i32_indexed | sqrdmlsh v21.4s, v9.4s, v1.s[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv1i32 | sqrdmlsh s30, s20, s13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMLSHv4i16 | sqrdmlsh v20.4h, v2.4h, v23.4h // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16_indexed | sqrdmulh h3, h25, v2.h[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i32_indexed | sqrdmulh s9, s24, v4.s[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv8i16_indexed | sqrdmulh v0.8h, v15.8h, v0.h[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32_indexed | sqrdmulh v6.2s, v29.2s, v4.s[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv1i16 | sqrdmulh h5, h2, h20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV02 | SQRDMULHv2i32 | sqrdmulh v31.2s, v17.2s, v4.2s // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv1i64 | sqrshl d6, d1, d30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHLv8i8 | sqrshl v15.8b, v26.8b, v21.8b // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNb | sqrshrn b6, h24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNh | sqrshrn h11, s22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNs | sqrshrn s4, d9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i8_shift | sqrshrn v31.8b, v31.8h, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i16_shift | sqrshrn v27.4h, v11.4s, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv2i32_shift | sqrshrn v4.2s, v30.2d, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv16i8_shift | sqrshrn2 v11.16b, v30.8h, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv8i16_shift | sqrshrn2 v14.8h, v3.4s, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRNv4i32_shift | sqrshrn2 v13.4s, v28.2d, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNb | sqrshrun b5, h0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNh | sqrshrun h25, s11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNs | sqrshrun s15, d18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i8_shift | sqrshrun v0.8b, v3.8h, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i16_shift | sqrshrun v5.4h, v8.4s, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv2i32_shift | sqrshrun v7.2s, v8.2d, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv16i8_shift | sqrshrun2 v14.16b, v14.8h, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv8i16_shift | sqrshrun2 v9.8h, v16.4s, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQRSHRUNv4i32_shift | sqrshrun2 v12.4s, v23.2d, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLb | sqshl b15, b3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLh | sqshl h21, h0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLs | sqshl s26, s9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLd | sqshl d8, d23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8_shift | sqshl v25.16b, v26.16b, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv4i16_shift | sqshl v29.4h, v1.4h, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i32_shift | sqshl v0.2s, v5.2s, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv2i64_shift | sqshl v11.2d, v2.2d, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv1i32 | sqshl s17, s4, s23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLv16i8 | sqshl v23.16b, v23.16b, v23.16b // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUb | sqshlu b3, b27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUh | sqshlu h23, h4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUs | sqshlu s29, s29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUd | sqshlu d14, d5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i8_shift | sqshlu v11.8b, v17.8b, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv8i16_shift | sqshlu v18.8h, v8.8h, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv4i32_shift | sqshlu v25.4s, v7.4s, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHLUv2i64_shift | sqshlu v19.2d, v14.2d, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNb | sqshrn b17, h30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNh | sqshrn h30, s15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNs | sqshrn s16, d0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i8_shift | sqshrn v3.8b, v25.8h, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i16_shift | sqshrn v23.4h, v14.4s, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv2i32_shift | sqshrn v6.2s, v29.2d, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv16i8_shift | sqshrn2 v31.16b, v31.8h, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv8i16_shift | sqshrn2 v13.8h, v6.4s, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRNv4i32_shift | sqshrn2 v30.4s, v0.2d, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNb | sqshrun b3, h16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNh | sqshrun h11, s10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNs | sqshrun s18, d1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv8i8_shift | sqshrun v21.8b, v27.8h, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv4i16_shift | sqshrun v18.4h, v19.4s, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv2i32_shift | sqshrun v2.2s, v14.2d, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv16i8_shift | sqshrun2 v10.16b, v28.8h, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv8i16_shift | sqshrun2 v4.8h, v28.4s, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQSHRUNv4i32_shift | sqshrun2 v7.4s, v18.2d, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_B | sqsub z13.b, z13.b, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_H | sqsub z28.h, z28.h, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZI_S | sqsub z11.s, z11.s, #14 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SQSUB_ZZZ_S | sqsub z28.s, z9.s, z12.s // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv1i8 | sqsub b3, b13, b12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SQSUBv8i16 | sqsub v20.8h, v18.8h, v12.8h // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv1i8 | sqxtn b11, h22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv2i32 | sqxtn v3.2s, v17.2d // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTNv8i16 | sqxtn2 v17.8h, v27.4s // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv1i8 | sqxtun b30, h18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv8i8 | sqxtun v26.8b, v21.8h // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SQXTUNv16i8 | sqxtun2 v22.16b, v6.8h // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SRHADDv8i8 | srhadd v29.8b, v3.8b, v8.8b // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRId | sri d30, d17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv16i8_shift | sri v23.16b, v30.16b, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv4i16_shift | sri v1.4h, v0.4h, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i32_shift | sri v28.2s, v6.2s, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SRIv2i64_shift | sri v8.2d, v19.2d, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv1i64 | srshl d30, d8, d8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHLv8i8 | srshl v20.8b, v23.8b, v27.8b // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRd | srshr d20, d18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i8_shift | srshr v20.8b, v0.8b, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv8i16_shift | srshr v27.8h, v19.8h, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i32_shift | srshr v8.2s, v20.2s, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | SRSHRv2i64_shift | srshr v31.2d, v17.2d, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAd | srsra d13, d10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv16i8_shift | srsra v31.16b, v15.16b, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv4i16_shift | srsra v14.4h, v27.4h, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i32_shift | srsra v17.2s, v8.2s, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SRSRAv2i64_shift | srsra v22.2d, v4.2d, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCW_ZPiI | sqincw z30.s // SQINCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCW_ZPiI | sqincw z8.s, mul3 // SQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQINCW_ZPiI | sqincw z0.s, vl5, mul #9 // SQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQNEGv1i64 | sqneg d24, d22 // SQNEG <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQNEGv16i8 | sqneg v30.16b, v15.16b // SQNEG <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv1i16_indexed | sqrdmlah h14, h4, v6.h[7] // SQRDMLAH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv1i32_indexed | sqrdmlah s24, s17, v6.s[2] // SQRDMLAH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv4i16_indexed | sqrdmlah v17.4h, v18.4h, v4.h[7] // SQRDMLAH <Vd>.4H, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv2i32_indexed | sqrdmlah v10.2s, v17.2s, v3.s[3] // SQRDMLAH <Vd>.2S, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv1i32 | sqrdmlah s3, s3, s5 // SQRDMLAH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLAHv8i16 | sqrdmlah v16.8h, v30.8h, v28.8h // SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv1i16_indexed | sqrdmlsh h13, h26, v4.h[2] // SQRDMLSH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv1i32_indexed | sqrdmlsh s26, s29, v7.s[0] // SQRDMLSH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv8i16_indexed | sqrdmlsh v1.8h, v21.8h, v8.h[1] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv4i16_indexed | sqrdmlsh v8.4h, v11.4h, v1.h[3] // SQRDMLSH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv2i32_indexed | sqrdmlsh v20.2s, v29.2s, v4.s[3] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv4i32_indexed | sqrdmlsh v21.4s, v9.4s, v1.s[0] // SQRDMLSH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv1i32 | sqrdmlsh s30, s20, s13 // SQRDMLSH <V><d>, <V><n>, <V><m> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMLSHv4i16 | sqrdmlsh v20.4h, v2.4h, v23.4h // SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply accumulate high \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv1i16_indexed | sqrdmulh h3, h25, v2.h[1] // SQRDMULH H<d>, H<n>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv1i32_indexed | sqrdmulh s9, s24, v4.s[3] // SQRDMULH S<d>, S<n>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv8i16_indexed | sqrdmulh v0.8h, v15.8h, v0.h[5] // SQRDMULH <Vd>.<Th>, <Vn>.<Th>, <Vmh>.H[<indexh>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv2i32_indexed | sqrdmulh v6.2s, v29.2s, v4.s[2] // SQRDMULH <Vd>.<Ts>, <Vn>.<Ts>, <Vms>.S[<indexs>] \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv1i16 | sqrdmulh h5, h2, h20 // SQRDMULH <V><d>, <V><n>, <V><m> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | SQRDMULHv2i32 | sqrdmulh v31.2s, v17.2s, v4.2s // SQRDMULH <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD multiply \\ 1 4 4 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHLv1i64 | sqrshl d6, d1, d30 // SQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHLv8i8 | sqrshl v15.8b, v26.8b, v21.8b // SQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNb | sqrshrn b6, h24, #3 // SQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNh | sqrshrn h11, s22, #8 // SQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNs | sqrshrn s4, d9, #13 // SQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv8i8_shift | sqrshrn v31.8b, v31.8h, #2 // SQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv4i16_shift | sqrshrn v27.4h, v11.4s, #8 // SQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv2i32_shift | sqrshrn v4.2s, v30.2d, #10 // SQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv16i8_shift | sqrshrn2 v11.16b, v30.8h, #7 // SQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv8i16_shift | sqrshrn2 v14.8h, v3.4s, #12 // SQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRNv4i32_shift | sqrshrn2 v13.4s, v28.2d, #24 // SQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNb | sqrshrun b5, h0, #3 // SQRSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNh | sqrshrun h25, s11, #7 // SQRSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNs | sqrshrun s15, d18, #2 // SQRSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv8i8_shift | sqrshrun v0.8b, v3.8h, #7 // SQRSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv4i16_shift | sqrshrun v5.4h, v8.4s, #7 // SQRSHRUN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv2i32_shift | sqrshrun v7.2s, v8.2d, #13 // SQRSHRUN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv16i8_shift | sqrshrun2 v14.16b, v14.8h, #3 // SQRSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv8i16_shift | sqrshrun2 v9.8h, v16.4s, #10 // SQRSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQRSHRUNv4i32_shift | sqrshrun2 v12.4s, v23.2d, #30 // SQRSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLb | sqshl b15, b3, #4 // SQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLh | sqshl h21, h0, #5 // SQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLs | sqshl s26, s9, #24 // SQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLd | sqshl d8, d23, #17 // SQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv16i8_shift | sqshl v25.16b, v26.16b, #5 // SQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv4i16_shift | sqshl v29.4h, v1.4h, #7 // SQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv2i32_shift | sqshl v0.2s, v5.2s, #1 // SQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv2i64_shift | sqshl v11.2d, v2.2d, #23 // SQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv1i32 | sqshl s17, s4, s23 // SQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLv16i8 | sqshl v23.16b, v23.16b, v23.16b // SQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUb | sqshlu b3, b27, #5 // SQSHLU B<d>, B<n>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUh | sqshlu h23, h4, #6 // SQSHLU H<d>, H<n>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUs | sqshlu s29, s29, #30 // SQSHLU S<d>, S<n>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUd | sqshlu d14, d5, #22 // SQSHLU D<d>, D<n>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUv8i8_shift | sqshlu v11.8b, v17.8b, #6 // SQSHLU <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUv8i16_shift | sqshlu v18.8h, v8.8h, #14 // SQSHLU <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUv4i32_shift | sqshlu v25.4s, v7.4s, #13 // SQSHLU <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHLUv2i64_shift | sqshlu v19.2d, v14.2d, #39 // SQSHLU <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ No description \\ No scheduling info
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNb | sqshrn b17, h30, #7 // SQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNh | sqshrn h30, s15, #5 // SQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNs | sqshrn s16, d0, #20 // SQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv8i8_shift | sqshrn v3.8b, v25.8h, #1 // SQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv4i16_shift | sqshrn v23.4h, v14.4s, #6 // SQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv2i32_shift | sqshrn v6.2s, v29.2d, #10 // SQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv16i8_shift | sqshrn2 v31.16b, v31.8h, #8 // SQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv8i16_shift | sqshrn2 v13.8h, v6.4s, #13 // SQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRNv4i32_shift | sqshrn2 v30.4s, v0.2d, #1 // SQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNb | sqshrun b3, h16, #3 // SQSHRUN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNh | sqshrun h11, s10, #7 // SQSHRUN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNs | sqshrun s18, d1, #13 // SQSHRUN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv8i8_shift | sqshrun v21.8b, v27.8h, #5 // SQSHRUN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv4i16_shift | sqshrun v18.4h, v19.4s, #2 // SQSHRUN <Vd>.4H, <Vn>.4S, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv2i32_shift | sqshrun v2.2s, v14.2d, #3 // SQSHRUN <Vd>.2S, <Vn>.2D, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv16i8_shift | sqshrun2 v10.16b, v28.8h, #5 // SQSHRUN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv8i16_shift | sqshrun2 v4.8h, v28.4s, #12 // SQSHRUN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQSHRUNv4i32_shift | sqshrun2 v7.4s, v18.2d, #16 // SQSHRUN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQSUB_ZI_B | sqsub z13.b, z13.b, #114 // SQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQSUB_ZI_H | sqsub z28.h, z28.h, #139 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQSUB_ZI_S | sqsub z11.s, z11.s, #14 // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SQSUB_ZZZ_S | sqsub z28.s, z9.s, z12.s // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQSUBv1i8 | sqsub b3, b13, b12 // SQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SQSUBv8i16 | sqsub v20.8h, v18.8h, v12.8h // SQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTNv1i8 | sqxtn b11, h22 // SQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTNv2i32 | sqxtn v3.2s, v17.2d // SQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTNv8i16 | sqxtn2 v17.8h, v27.4s // SQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTUNv1i8 | sqxtun b30, h18 // SQXTUN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTUNv8i8 | sqxtun v26.8b, v21.8h // SQXTUN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SQXTUNv16i8 | sqxtun2 v22.16b, v6.8h // SQXTUN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SRHADDv8i8 | srhadd v29.8b, v3.8b, v8.8b // SRHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRId | sri d30, d17, #61 // SRI <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRIv16i8_shift | sri v23.16b, v30.16b, #2 // SRI <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRIv4i16_shift | sri v1.4h, v0.4h, #4 // SRI <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRIv2i32_shift | sri v28.2s, v6.2s, #16 // SRI <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRIv2i64_shift | sri v8.2d, v19.2d, #40 // SRI <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed and insert, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHLv1i64 | srshl d30, d8, d8 // SRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHLv8i8 | srshl v20.8b, v23.8b, v27.8b // SRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHRd | srshr d20, d18, #27 // SRSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHRv8i8_shift | srshr v20.8b, v0.8b, #7 // SRSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHRv8i16_shift | srshr v27.8h, v19.8h, #9 // SRSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHRv2i32_shift | srshr v8.2s, v20.2s, #31 // SRSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSHRv2i64_shift | srshr v31.2d, v17.2d, #33 // SRSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSRAd | srsra d13, d10, #25 // SRSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSRAv16i8_shift | srsra v31.16b, v15.16b, #5 // SRSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSRAv4i16_shift | srsra v14.4h, v27.4h, #7 // SRSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSRAv2i32_shift | srsra v17.2s, v8.2s, #8 // SRSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SRSRAv2i64_shift | srsra v22.2d, v4.2d, #12 // SRSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | DSB | ssbb // SSBB \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv1i64 | sshl d29, d30, d9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLv2i64 | sshl v13.2d, v7.2d, v27.2d // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v9.8h, v2.8b, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i16_shift | sshll v12.4s, v3.4h, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv2i32_shift | sshll v17.2d, v6.2s, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv16i8_shift | sshll2 v28.8h, v12.16b, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i16_shift | sshll2 v29.4s, v22.8h, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v17.2d, v13.4s, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRd | sshr d3, d18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv8i8_shift | sshr v20.8b, v28.8b, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv4i16_shift | sshr v20.4h, v23.4h, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i32_shift | sshr v13.2s, v23.2s, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHRv2i64_shift | sshr v3.2d, v8.2d, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAd | ssra d28, d30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv8i8_shift | ssra v9.8b, v18.8b, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv4i16_shift | ssra v21.4h, v24.4h, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i32_shift | ssra v28.2s, v17.2s, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | SSRAv2i64_shift | ssra v0.2d, v23.2d, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv4i16_v4i32 | ssubl v13.4s, v9.4h, v5.4h // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBLv8i16_v4i32 | ssubl2 v18.4s, v29.8h, v17.8h // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv2i32_v2i64 | ssubw v5.2d, v13.2d, v4.2s // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SSUBWv8i16_v4i32 | ssubw2 v4.4s, v26.4s, v31.8h // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b | st1 { v18.8b }, [x15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b | st1 { v31.16b }, [x29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h | st1 { v19.4h }, [x7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h | st1 { v27.8h }, [x17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s | st1 { v25.2s }, [x6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s | st1 { v22.4s }, [x19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d | st1 { v20.1d }, [x10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d | st1 { v8.2d }, [x15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v16.8b }, [x14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v10.16b }, [x8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v29.4h }, [x17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v14.8h }, [x28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v18.2s }, [x20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v28.4s }, [x1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v17.1d }, [x27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v30.2d }, [x4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v13.8b }, [x8], x7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v4.16b }, [x7], x26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v17.4h }, [x10], x4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v18.8h }, [x15], x1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v6.2s }, [x17], x24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v26.4s }, [x20], x29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v13.1d }, [x3], x20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v15.2d }, [x21], x11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b | st1 { v8.8b, v9.8b }, [x18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b | st1 { v1.16b, v2.16b }, [x4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h | st1 { v22.4h, v23.4h }, [x22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h | st1 { v18.8h, v19.8h }, [x2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s | st1 { v13.2s, v14.2s }, [x9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s | st1 { v15.4s, v16.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d | st1 { v21.1d, v22.1d }, [x29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d | st1 { v26.2d, v27.2d }, [x28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v23.8b, v24.8b }, [x4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v15.16b, v16.16b }, [x16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v7.4h, v8.4h }, [x7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v8.8h, v9.8h }, [x1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v23.2s, v24.2s }, [x7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v8.4s, v9.4s }, [x15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v14.1d, v15.1d }, [x11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v12.2d, v13.2d }, [x2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v3.8b, v4.8b }, [x28], x14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v19.16b, v20.16b }, [x13], x7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v28.4h, v29.4h }, [x14], x5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v9.8h, v10.8h }, [x28], x9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v10.2s, v11.2s }, [x10], x2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v13.4s, v14.4s }, [x8], x15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v5.1d, v6.1d }, [x9], x14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v14.2d, v15.2d }, [x24], x1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b | st1 { v15.8b, v16.8b, v17.8b }, [x0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b | st1 { v27.16b, v28.16b, v29.16b }, [x18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h | st1 { v13.4h, v14.4h, v15.4h }, [x7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h | st1 { v8.8h, v9.8h, v10.8h }, [x16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s | st1 { v12.2s, v13.2s, v14.2s }, [x3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s | st1 { v19.4s, v20.4s, v21.4s }, [x7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d | st1 { v5.1d, v6.1d, v7.1d }, [x3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d | st1 { v13.2d, v14.2d, v15.2d }, [x27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v3.8b, v4.8b, v5.8b }, [x21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v25.16b, v26.16b, v27.16b }, [x4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v24.4h, v25.4h, v26.4h }, [x9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v0.8h, v1.8h, v2.8h }, [x7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v3.2s, v4.2s, v5.2s }, [x4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v25.4s, v26.4s, v27.4s }, [x14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v7.1d, v8.1d, v9.1d }, [x13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v19.2d, v20.2d, v21.2d }, [x5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v5.8b, v6.8b, v7.8b }, [x17], x25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v12.16b, v13.16b, v14.16b }, [x29], x23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v18.4h, v19.4h, v20.4h }, [x0], x14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v16.8h, v17.8h, v18.8h }, [x1], x18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v1.2s, v2.2s, v3.2s }, [x15], x29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v2.4s, v3.4s, v4.4s }, [x29], x6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v8.1d, v9.1d, v10.1d }, [x13], x27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v8.2d, v9.2d, v10.2d }, [x18], x19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b | st1 { v18.16b, v19.16b, v20.16b, v21.16b }, [x29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h | st1 { v23.4h, v24.4h, v25.4h, v26.4h }, [x24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h | st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s | st1 { v6.2s, v7.2s, v8.2s, v9.2s }, [x13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s | st1 { v26.4s, v27.4s, v28.4s, v29.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
-# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d | st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d | st1 { v25.2d, v26.2d, v27.2d, v28.2d }, [x19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v27.8b, v28.8b, v29.8b, v30.8b }, [x17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v18.4h, v19.4h, v20.4h, v21.4h }, [x22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v13.2s, v14.2s, v15.2s, v16.2s }, [x25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v12.2d, v13.2d, v14.2d, v15.2d }, [x25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x25], x28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x24], x5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x25], x19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v20.8h, v21.8h, v22.8h, v23.8h }, [x18], x0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v4.2s, v5.2s, v6.2s, v7.2s }, [x9], x5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x12], x30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v23.1d, v24.1d, v25.1d, v26.1d }, [x23], x4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
-# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v20.2d, v21.2d, v22.2d, v23.2d }, [x7], x14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8 | st1 { v1.b }[5], [x1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16 | st1 { v0.h }[2], [x1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32 | st1 { v31.s }[1], [x16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64 | st1 { v15.d }[1], [x8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v15.b }[1], [x12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v16.b }[3], [x0], x2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v29.h }[2], [x27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v15.h }[4], [x30], x9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v3.s }[1], [x24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v26.s }[0], [x2], x30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v19.d }[1], [x9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v29.d }[0], [x26], x22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_H_IMM | st1b { z7.h }, p2, [x14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S_IMM | st1b { z16.s }, p4, [x20, #3, mul vl] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1B_S | st1b { z17.s }, p3, [x20, x0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_UXTW | st1b { z0.d }, p4, [x11, z13.d, uxtw] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_SXTW | st1b { z16.s }, p4, [x19, z25.s, sxtw] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D | st1b { z10.d }, p3, [x12, z21.d] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z17.s }, p7, [z28.s] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z16.s }, p0, [z25.s, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z15.d }, p6, [z27.d] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z2.d }, p0, [z21.d, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_SXTW_SCALED | st1d { z10.d }, p2, [x26, z5.d, sxtw #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_UXTW | st1d { z18.d }, p2, [x7, z1.d, uxtw] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_SCALED | st1d { z9.d }, p6, [x6, z12.d, lsl #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D | st1d { z3.d }, p3, [x1, z30.d] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_IMM | st1d { z18.d }, p0, [z7.d] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1D_IMM | st1d { z4.d }, p2, [z2.d, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_S_IMM | st1h { z28.s }, p3, [x18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | ST1H_IMM | st1h { z23.h }, p1, [x14, #-8, mul vl] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | ST1H_S | st1h { z25.s }, p3, [x17, x8, lsl #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_SXTW_SCALED | st1h { z12.s }, p3, [x24, z30.s, sxtw #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_UXTW_SCALED | st1h { z26.d }, p5, [x9, z17.d, uxtw #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_SXTW | st1h { z23.d }, p1, [x5, z25.d, sxtw] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_SXTW | st1h { z14.s }, p4, [x22, z17.s, sxtw] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_SCALED | st1h { z23.d }, p3, [x25, z11.d, lsl #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D | st1h { z0.d }, p4, [x21, z21.d] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z29.s }, p5, [z9.s] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z4.s }, p7, [z23.s, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z27.d }, p2, [z3.d] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z11.d }, p6, [z7.d, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_SXTW_SCALED | st1w { z25.s }, p1, [x9, z28.s, sxtw #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SXTW_SCALED | st1w { z13.d }, p3, [x16, z9.d, sxtw #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SXTW | st1w { z21.d }, p1, [x24, z23.d, sxtw] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_UXTW | st1w { z17.s }, p1, [x5, z22.s, uxtw] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_SCALED | st1w { z28.d }, p1, [x5, z8.d, lsl #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D | st1w { z26.d }, p3, [x3, z0.d] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_IMM | st1w { z28.s }, p6, [z21.s] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8] | SST1W_IMM | st1w { z26.s }, p3, [z24.s, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z3.d }, p0, [z12.d] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z17.d }, p2, [z1.d, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b | st2 { v14.8b, v15.8b }, [x2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b | st2 { v6.16b, v7.16b }, [x23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h | st2 { v10.4h, v11.4h }, [x18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h | st2 { v10.8h, v11.8h }, [x18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s | st2 { v25.2s, v26.2s }, [x29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s | st2 { v26.4s, v27.4s }, [x14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d | st2 { v10.2d, v11.2d }, [x1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v21.8b, v22.8b }, [x22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v26.16b, v27.16b }, [x2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v19.4h, v20.4h }, [x27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v28.8h, v29.8h }, [x22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v1.2s, v2.2s }, [x26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v19.4s, v20.4s }, [x7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v22.2d, v23.2d }, [x18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v29.8b, v30.8b }, [x9], x2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v17.16b, v18.16b }, [x4], x0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v9.4h, v10.4h }, [x7], x25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v8.8h, v9.8h }, [x11], x8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v17.2s, v18.2s }, [x2], x8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v9.4s, v10.4s }, [x23], x12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v29.2d, v30.2d }, [x25], x11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8 | st2 { v21.b, v22.b }[15], [x15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16 | st2 { v28.h, v29.h }[2], [x6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32 | st2 { v14.s, v15.s }[1], [x25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64 | st2 { v17.d, v18.d }[1], [x1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v9.b, v10.b }[15], [x12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v19.b, v20.b }[9], [x27], x28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v18.h, v19.h }[3], [x30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v13.h, v14.h }[5], [x23], x24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v23.s, v24.s }[1], [x22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v16.s, v17.s }[3], [x12], x16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v27.d, v28.d }[0], [x16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v6.d, v7.d }[1], [x14], x5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B_IMM | st2b { z19.b, z20.b }, p1, [x18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B_IMM | st2b { z26.b, z27.b }, p7, [x15, #-6, mul vl] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2B | st2b { z19.b, z20.b }, p1, [x23, x27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D_IMM | st2d { z29.d, z30.d }, p4, [x8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D_IMM | st2d { z16.d, z17.d }, p3, [x20, #14, mul vl] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2D | st2d { z17.d, z18.d }, p7, [x2, x28, lsl #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2H_IMM | st2h { z5.h, z6.h }, p7, [x23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2H_IMM | st2h { z11.h, z12.h }, p6, [x4, #10, mul vl] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitV[2] | ST2H | st2h { z3.h, z4.h }, p3, [x22, x16, lsl #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 1.0 V1UnitL01[2],V1UnitS[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W_IMM | st2w { z14.s, z15.s }, p4, [x17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W_IMM | st2w { z9.s, z10.s }, p5, [x19, #-8, mul vl] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2] | ST2W | st2w { z5.s, z6.s }, p3, [x23, x13, lsl #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b | st3 { v10.8b, v11.8b, v12.8b }, [x18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b | st3 { v26.16b, v27.16b, v28.16b }, [x4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h | st3 { v25.4h, v26.4h, v27.4h }, [x11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h | st3 { v0.8h, v1.8h, v2.8h }, [x0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s | st3 { v19.2s, v20.2s, v21.2s }, [x30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s | st3 { v24.4s, v25.4s, v26.4s }, [x8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
-# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d | st3 { v24.2d, v25.2d, v26.2d }, [x25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v25.8b, v26.8b, v27.8b }, [x23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v24.4h, v25.4h, v26.4h }, [x3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v23.8h, v24.8h, v25.8h }, [x22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v7.2s, v8.2s, v9.2s }, [x8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v11.4s, v12.4s, v13.4s }, [x15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v1.2d, v2.2d, v3.2d }, [x4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v16.8b, v17.8b, v18.8b }, [x26], x2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x3], x18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v2.4h, v3.4h, v4.4h }, [x4], x4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v27.8h, v28.8h, v29.8h }, [x27], x8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v26.2s, v27.2s, v28.2s }, [x2], x25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v5.4s, v6.4s, v7.4s }, [x18], x29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v26.2d, v27.2d, v28.2d }, [x14], x5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8 | st3 { v8.b, v9.b, v10.b }[4], [x18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16 | st3 { v11.h, v12.h, v13.h }[4], [x0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32 | st3 { v9.s, v10.s, v11.s }[2], [x20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64 | st3 { v16.d, v17.d, v18.d }[0], [x13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v26.b, v27.b, v28.b }[1], [x12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v27.b, v28.b, v29.b }[15], [x19], x23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v24.h, v25.h, v26.h }[2], [x14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v1.h, v2.h, v3.h }[2], [x0], x23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v25.s, v26.s, v27.s }[2], [x10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v8.s, v9.s, v10.s }[0], [x11], x20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v19.d, v20.d, v21.d }[1], [x5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v10.d, v11.d, v12.d }[0], [x12], x11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3B_IMM | st3b { z0.b - z2.b }, p6, [x26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3B_IMM | st3b { z22.b - z24.b }, p6, [x25, #3, mul vl] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3B | st3b { z14.b - z16.b }, p2, [x29, x27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3D_IMM | st3d { z6.d - z8.d }, p2, [x12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3D_IMM | st3d { z20.d - z22.d }, p5, [x15, #9, mul vl] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3D | st3d { z15.d - z17.d }, p7, [x0, x9, lsl #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3H_IMM | st3h { z17.h - z19.h }, p3, [x14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3H_IMM | st3h { z21.h - z23.h }, p0, [x15, #6, mul vl] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3H | st3h { z2.h - z4.h }, p3, [x21, x9, lsl #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3W_IMM | st3w { z9.s - z11.s }, p3, [x29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitV[9] | ST3W_IMM | st3w { z11.s - z13.s }, p4, [x13, #15, mul vl] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
-# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitV[9] | ST3W | st3w { z19.s - z21.s }, p2, [x22, x28, lsl #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
-# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b | st4 { v17.8b, v18.8b, v19.8b, v20.8b }, [x8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b | st4 { v7.16b, v8.16b, v9.16b, v10.16b }, [x15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
-# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h | st4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h | st4 { v11.8h, v12.8h, v13.8h, v14.8h }, [x1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
-# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s | st4 { v15.2s, v16.2s, v17.2s, v18.2s }, [x18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
-# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
-# CHECK-NEXT: 2 | 4 | 4 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d | st4 { v25.2d, v26.2d, v27.2d, v28.2d }, [x16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 2 4 4 0.25 V1UnitV01[8],V1UnitL01[8]
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v16.8b, v17.8b, v18.8b, v19.8b }, [x24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v17.4h, v18.4h, v19.4h, v20.4h }, [x3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v18.8h, v19.8h, v20.8h, v21.8h }, [x5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v26.2s, v27.2s, v28.2s, v29.2s }, [x17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v24.8b, v25.8b, v26.8b, v27.8b }, [x24], x8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x21], x21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v11.4h, v12.4h, v13.4h, v14.4h }, [x29], x3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v16.8h, v17.8h, v18.8h, v19.8h }, [x13], x3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v13.2s, v14.2s, v15.2s, v16.2s }, [x0], x0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
-# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v26.4s, v27.4s, v28.4s, v29.4s }, [x1], x22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x10], x28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8 | st4 { v10.b, v11.b, v12.b, v13.b }[3], [x5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16 | st4 { v5.h, v6.h, v7.h, v8.h }[4], [x13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32 | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64 | st4 { v23.d, v24.d, v25.d, v26.d }[1], [x5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v22.b, v23.b, v24.b, v25.b }[0], [x29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v6.b, v7.b, v8.b, v9.b }[9], [x26], x21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v19.h, v20.h, v21.h, v22.h }[2], [x18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v6.h, v7.h, v8.h, v9.h }[4], [x9], x9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v19.s, v20.s, v21.s, v22.s }[2], [x27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x29], x21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x12], x11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4B_IMM | st4b { z22.b - z25.b }, p0, [x0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4B_IMM | st4b { z1.b - z4.b }, p7, [x1, #20, mul vl] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4B | st4b { z28.b - z31.b }, p4, [x27, x20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4D_IMM | st4d { z19.d - z22.d }, p1, [x11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4D_IMM | st4d { z0.d - z3.d }, p6, [x7, #-24, mul vl] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4D | st4d { z28.d - z31.d }, p5, [x19, x20, lsl #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4H_IMM | st4h { z14.h - z17.h }, p1, [x24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4H_IMM | st4h { z27.h - z30.h }, p3, [x26, #16, mul vl] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4H | st4h { z2.h - z5.h }, p5, [x30, x17, lsl #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4W_IMM | st4w { z3.s - z6.s }, p0, [x0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitV[18] | ST4W_IMM | st4w { z5.s - z8.s }, p2, [x0, #-20, mul vl] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
-# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitV[18] | ST4W | st4w { z21.s - z24.s }, p5, [x5, x18, lsl #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLv1i64 | sshl d29, d30, d9 // SSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLv2i64 | sshl v13.2d, v7.2d, v27.2d // SSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v9.8h, v2.8b, #0 // SSHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv4i16_shift | sshll v12.4s, v3.4h, #4 // SSHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv2i32_shift | sshll v17.2d, v6.2s, #22 // SSHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv16i8_shift | sshll2 v28.8h, v12.16b, #7 // SSHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv8i16_shift | sshll2 v29.4s, v22.8h, #7 // SSHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v17.2d, v13.4s, #22 // SSHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHRd | sshr d3, d18, #10 // SSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHRv8i8_shift | sshr v20.8b, v28.8b, #2 // SSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHRv4i16_shift | sshr v20.4h, v23.4h, #10 // SSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHRv2i32_shift | sshr v13.2s, v23.2s, #2 // SSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHRv2i64_shift | sshr v3.2d, v8.2d, #61 // SSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSRAd | ssra d28, d30, #51 // SSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSRAv8i8_shift | ssra v9.8b, v18.8b, #2 // SSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSRAv4i16_shift | ssra v21.4h, v24.4h, #3 // SSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSRAv2i32_shift | ssra v28.2s, v17.2s, #6 // SSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSRAv2i64_shift | ssra v0.2d, v23.2d, #35 // SSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SSUBLv4i16_v4i32 | ssubl v13.4s, v9.4h, v5.4h // SSUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SSUBLv8i16_v4i32 | ssubl2 v18.4s, v29.8h, v17.8h // SSUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SSUBWv2i32_v2i64 | ssubw v5.2d, v13.2d, v4.2s // SSUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SSUBWv8i16_v4i32 | ssubw2 v4.4s, v26.4s, v31.8h // SSUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8b | st1 { v18.8b }, [x15] // ST1 { <Vt>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev16b | st1 { v31.16b }, [x29] // ST1 { <Vt>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4h | st1 { v19.4h }, [x7] // ST1 { <Vt>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8h | st1 { v27.8h }, [x17] // ST1 { <Vt>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2s | st1 { v25.2s }, [x6] // ST1 { <Vt>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4s | st1 { v22.4s }, [x19] // ST1 { <Vt>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev1d | st1 { v20.1d }, [x10] // ST1 { <Vt>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2d | st1 { v8.2d }, [x15] // ST1 { <Vt>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v16.8b }, [x14], #8 // ST1 { <Vt>.8B }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v10.16b }, [x8], #16 // ST1 { <Vt>.16B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v29.4h }, [x17], #8 // ST1 { <Vt>.4H }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v14.8h }, [x28], #16 // ST1 { <Vt>.8H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v18.2s }, [x20], #8 // ST1 { <Vt>.2S }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v28.4s }, [x1], #16 // ST1 { <Vt>.4S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v17.1d }, [x27], #8 // ST1 { <Vt>.1D }, [<Xn|SP>], #8 \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v30.2d }, [x4], #16 // ST1 { <Vt>.2D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8b_POST | st1 { v13.8b }, [x8], x7 // ST1 { <Vt>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev16b_POST | st1 { v4.16b }, [x7], x26 // ST1 { <Vt>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4h_POST | st1 { v17.4h }, [x10], x4 // ST1 { <Vt>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev8h_POST | st1 { v18.8h }, [x15], x1 // ST1 { <Vt>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2s_POST | st1 { v6.2s }, [x17], x24 // ST1 { <Vt>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev4s_POST | st1 { v26.4s }, [x20], x29 // ST1 { <Vt>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev1d_POST | st1 { v13.1d }, [x3], x20 // ST1 { <Vt>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Onev2d_POST | st1 { v15.2d }, [x21], x11 // ST1 { <Vt>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 1 reg, Q-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov8b | st1 { v8.8b, v9.8b }, [x18] // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b | st1 { v1.16b, v2.16b }, [x4] // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov4h | st1 { v22.4h, v23.4h }, [x22] // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h | st1 { v18.8h, v19.8h }, [x2] // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov2s | st1 { v13.2s, v14.2s }, [x9] // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s | st1 { v15.4s, v16.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov1d | st1 { v21.1d, v22.1d }, [x29] // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d | st1 { v26.2d, v27.2d }, [x28] // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v23.8b, v24.8b }, [x4], #16 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v15.16b, v16.16b }, [x16], #32 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v7.4h, v8.4h }, [x7], #16 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v8.8h, v9.8h }, [x1], #32 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v23.2s, v24.2s }, [x7], #16 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v8.4s, v9.4s }, [x15], #32 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v14.1d, v15.1d }, [x11], #16 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], #16 \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v12.2d, v13.2d }, [x2], #32 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov8b_POST | st1 { v3.8b, v4.8b }, [x28], x14 // ST1 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov16b_POST | st1 { v19.16b, v20.16b }, [x13], x7 // ST1 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov4h_POST | st1 { v28.4h, v29.4h }, [x14], x5 // ST1 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov8h_POST | st1 { v9.8h, v10.8h }, [x28], x9 // ST1 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov2s_POST | st1 { v10.2s, v11.2s }, [x10], x2 // ST1 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov4s_POST | st1 { v13.4s, v14.4s }, [x8], x15 // ST1 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1Twov1d_POST | st1 { v5.1d, v6.1d }, [x9], x14 // ST1 { <Vt>.1D, <Vt2>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, D-form \\ 3 2 2 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Twov2d_POST | st1 { v14.2d, v15.2d }, [x24], x1 // ST1 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 2 reg, Q-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b | st1 { v15.8b, v16.8b, v17.8b }, [x0] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b | st1 { v27.16b, v28.16b, v29.16b }, [x18] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h | st1 { v13.4h, v14.4h, v15.4h }, [x7] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h | st1 { v8.8h, v9.8h, v10.8h }, [x16] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s | st1 { v12.2s, v13.2s, v14.2s }, [x3] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s | st1 { v19.4s, v20.4s, v21.4s }, [x7] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d | st1 { v5.1d, v6.1d, v7.1d }, [x3] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d | st1 { v13.2d, v14.2d, v15.2d }, [x27] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 2 2 2 0.67 V1UnitL01[3],V1UnitV01[3]
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v3.8b, v4.8b, v5.8b }, [x21], #24 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v25.16b, v26.16b, v27.16b }, [x4], #48 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v24.4h, v25.4h, v26.4h }, [x9], #24 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v0.8h, v1.8h, v2.8h }, [x7], #48 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v3.2s, v4.2s, v5.2s }, [x4], #24 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v25.4s, v26.4s, v27.4s }, [x14], #48 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v7.1d, v8.1d, v9.1d }, [x13], #24 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], #24 \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v19.2d, v20.2d, v21.2d }, [x5], #48 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev8b_POST | st1 { v5.8b, v6.8b, v7.8b }, [x17], x25 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev16b_POST | st1 { v12.16b, v13.16b, v14.16b }, [x29], x23 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev4h_POST | st1 { v18.4h, v19.4h, v20.4h }, [x0], x14 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev8h_POST | st1 { v16.8h, v17.8h, v18.8h }, [x1], x18 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev2s_POST | st1 { v1.2s, v2.2s, v3.2s }, [x15], x29 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev4s_POST | st1 { v2.4s, v3.4s, v4.4s }, [x29], x6 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Threev1d_POST | st1 { v8.1d, v9.1d, v10.1d }, [x13], x27 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST1Threev2d_POST | st1 { v8.2d, v9.2d, v10.2d }, [x18], x19 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 3 reg, Q-form \\ 3 2 2 0.67 V1UnitL01[3],V1UnitV01[3],V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x14] // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b | st1 { v18.16b, v19.16b, v20.16b, v21.16b }, [x29] // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h | st1 { v23.4h, v24.4h, v25.4h, v26.4h }, [x24] // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h | st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x19] // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s | st1 { v6.2s, v7.2s, v8.2s, v9.2s }, [x13] // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s | st1 { v26.4s, v27.4s, v28.4s, v29.4s }, [x12] // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d | st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x10] // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 2 2 2 1.0 V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d | st1 { v25.2d, v26.2d, v27.2d, v28.2d }, [x19] // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 2 2 2 0.5 V1UnitL01[4],V1UnitV01[4]
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v27.8b, v28.8b, v29.8b, v30.8b }, [x17], #32 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x0], #64 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v18.4h, v19.4h, v20.4h, v21.4h }, [x22], #32 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v12.8h, v13.8h, v14.8h, v15.8h }, [x13], #64 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v13.2s, v14.2s, v15.2s, v16.2s }, [x25], #32 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x11], #64 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x13], #32 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], #32 \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v12.2d, v13.2d, v14.2d, v15.2d }, [x25], #64 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv8b_POST | st1 { v21.8b, v22.8b, v23.8b, v24.8b }, [x25], x28 // ST1 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv16b_POST | st1 { v26.16b, v27.16b, v28.16b, v29.16b }, [x24], x5 // ST1 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv4h_POST | st1 { v20.4h, v21.4h, v22.4h, v23.4h }, [x25], x19 // ST1 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv8h_POST | st1 { v20.8h, v21.8h, v22.8h, v23.8h }, [x18], x0 // ST1 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv2s_POST | st1 { v4.2s, v5.2s, v6.2s, v7.2s }, [x9], x5 // ST1 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv4s_POST | st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x12], x30 // ST1 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST1Fourv1d_POST | st1 { v23.1d, v24.1d, v25.1d, v26.1d }, [x23], x4 // ST1 { <Vt>.1D, <Vt2>.1D, <Vt3>.1D, <Vt4>.1D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, D-form \\ 3 2 2 1.0 V1UnitL01[2],V1UnitV01[2],V1UnitI
+# CHECK-NEXT: 3 | 2 | 2 | 0.50 | V1UnitI, V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV01[4] | ST1Fourv2d_POST | st1 { v20.2d, v21.2d, v22.2d, v23.2d }, [x7], x14 // ST1 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, multiple, 4 reg, Q-form \\ 3 2 2 0.5 V1UnitL01[4],V1UnitV01[4],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i8 | st1 { v1.b }[5], [x1] // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i16 | st1 { v0.h }[2], [x1] // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i32 | st1 { v31.s }[1], [x16] // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i64 | st1 { v15.d }[1], [x8] // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 1 element, one lane, D \\ 2 4 4 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v15.b }[1], [x12], #1 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], #1 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i8_POST | st1 { v16.b }[3], [x0], x2 // ST1 { <Vt>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v29.h }[2], [x27], #2 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], #2 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i16_POST | st1 { v15.h }[4], [x30], x9 // ST1 { <Vt>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v3.s }[1], [x24], #4 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], #4 \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i32_POST | st1 { v26.s }[0], [x2], x30 // ST1 { <Vt>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v19.d }[1], [x9], #8 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], #8 \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST1i64_POST | st1 { v29.d }[0], [x26], x22 // ST1 { <Vt>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 1 element, one lane, D \\ 3 4 4 2.0 V1UnitL01,V1UnitV01,V1UnitI
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | ST1B_H_IMM | st1b { z7.h }, p2, [x14] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | ST1B_S_IMM | st1b { z16.s }, p4, [x20, #3, mul vl] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | ST1B_S | st1b { z17.s }, p3, [x20, x0] // ST1B { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1B_D_UXTW | st1b { z0.d }, p4, [x11, z13.d, uxtw] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1B_S_SXTW | st1b { z16.s }, p4, [x19, z25.s, sxtw] // ST1B { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1B_D | st1b { z10.d }, p3, [x12, z21.d] // ST1B { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z17.s }, p7, [z28.s] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1B_S_IMM | st1b { z16.s }, p0, [z25.s, #7] // ST1B { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z15.d }, p6, [z27.d] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1B_D_IMM | st1b { z2.d }, p0, [z21.d, #24] // ST1B { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D_SXTW_SCALED | st1d { z10.d }, p2, [x26, z5.d, sxtw #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D_UXTW | st1d { z18.d }, p2, [x7, z1.d, uxtw] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D_SCALED | st1d { z9.d }, p6, [x6, z12.d, lsl #3] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D | st1d { z3.d }, p3, [x1, z30.d] // ST1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D_IMM | st1d { z18.d }, p0, [z7.d] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1D_IMM | st1d { z4.d }, p2, [z2.d, #136] // ST1D { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | ST1H_S_IMM | st1h { z28.s }, p3, [x18] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | ST1H_IMM | st1h { z23.h }, p1, [x14, #-8, mul vl] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitSVE01, V1UnitV | ST1H_S | st1h { z25.s }, p3, [x17, x8, lsl #1] // ST1H { <Zt>.<T> }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1H_S_SXTW_SCALED | st1h { z12.s }, p3, [x24, z30.s, sxtw #1] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D_UXTW_SCALED | st1h { z26.d }, p5, [x9, z17.d, uxtw #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D_SXTW | st1h { z23.d }, p1, [x5, z25.d, sxtw] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1H_S_SXTW | st1h { z14.s }, p4, [x22, z17.s, sxtw] // ST1H { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D_SCALED | st1h { z23.d }, p3, [x25, z11.d, lsl #1] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D | st1h { z0.d }, p4, [x21, z21.d] // ST1H { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z29.s }, p5, [z9.s] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1H_S_IMM | st1h { z4.s }, p7, [z23.s, #40] // ST1H { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z27.d }, p2, [z3.d] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1H_D_IMM | st1h { z11.d }, p6, [z7.d, #38] // ST1H { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1W_SXTW_SCALED | st1w { z25.s }, p1, [x9, z28.s, sxtw #2] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2] \\ Scatter store, 32-bit scaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D_SXTW_SCALED | st1w { z13.d }, p3, [x16, z9.d, sxtw #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2] \\ Scatter store, 32-bit unpacked scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D_SXTW | st1w { z21.d }, p1, [x24, z23.d, sxtw] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] \\ Scatter store, 32-bit unpacked unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1W_UXTW | st1w { z17.s }, p1, [x5, z22.s, uxtw] // ST1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] \\ Scatter store, 32-bit unscaled offset \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D_SCALED | st1w { z28.d }, p1, [x5, z8.d, lsl #2] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2] \\ Scatter store, 64-bit scaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D | st1w { z26.d }, p3, [x3, z0.d] // ST1W { <Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D] \\ Scatter store, 64-bit unscaled offset \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1W_IMM | st1w { z28.s }, p6, [z21.s] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 10 | 10 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8] | SST1W_IMM | st1w { z26.s }, p3, [z24.s, #120] // ST1W { <Zt>.S }, <Pg>, [<Zn>.S, #<imm>] \\ Scatter store vector + imm 32-bit element size \\ 2 10 10 0.25 V1UnitL01[8],V1UnitV[8]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z3.d }, p0, [z12.d] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 6 | 6 | 0.50 | V1UnitL[4], V1UnitL01[4], V1UnitSVE01[4], V1UnitV[4] | SST1W_D_IMM | st1w { z17.d }, p2, [z1.d, #80] // ST1W { <Zt>.D }, <Pg>, [<Zn>.D, #<imm>] \\ Scatter store vector + imm 64-bit element size \\ 2 6 6 0.5 V1UnitL01[4],V1UnitV[4]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov8b | st2 { v14.8b, v15.8b }, [x2] // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b | st2 { v6.16b, v7.16b }, [x23] // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov4h | st2 { v10.4h, v11.4h }, [x18] // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h | st2 { v10.8h, v11.8h }, [x18] // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov2s | st2 { v25.2s, v26.2s }, [x29] // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s | st2 { v26.4s, v27.4s }, [x14] // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d | st2 { v10.2d, v11.2d }, [x1] // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>] \\ ASIMD store, 2 element, multiple, Q-form, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v21.8b, v22.8b }, [x22], #16 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v26.16b, v27.16b }, [x2], #32 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v19.4h, v20.4h }, [x27], #16 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v28.8h, v29.8h }, [x22], #32 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v1.2s, v2.2s }, [x26], #16 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], #16 \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v19.4s, v20.4s }, [x7], #32 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v22.2d, v23.2d }, [x18], #32 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], #32 \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov8b_POST | st2 { v29.8b, v30.8b }, [x9], x2 // ST2 { <Vt>.8B, <Vt2>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov16b_POST | st2 { v17.16b, v18.16b }, [x4], x0 // ST2 { <Vt>.16B, <Vt2>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov4h_POST | st2 { v9.4h, v10.4h }, [x7], x25 // ST2 { <Vt>.4H, <Vt2>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov8h_POST | st2 { v8.8h, v9.8h }, [x11], x8 // ST2 { <Vt>.8H, <Vt2>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2Twov2s_POST | st2 { v17.2s, v18.2s }, [x2], x8 // ST2 { <Vt>.2S, <Vt2>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, D-form, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov4s_POST | st2 { v9.4s, v10.4s }, [x23], x12 // ST2 { <Vt>.4S, <Vt2>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST2Twov2d_POST | st2 { v29.2d, v30.2d }, [x25], x11 // ST2 { <Vt>.2D, <Vt2>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, multiple, Q-form, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i8 | st2 { v21.b, v22.b }[15], [x15] // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i16 | st2 { v28.h, v29.h }[2], [x6] // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i32 | st2 { v14.s, v15.s }[1], [x25] // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, B/H/S \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 2 | 4 | 4 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i64 | st2 { v17.d, v18.d }[1], [x1] // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 2 element, one lane, D \\ 2 4 4 2.0 V1UnitV01,V1UnitL01
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v9.b, v10.b }[15], [x12], #2 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], #2 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i8_POST | st2 { v19.b, v20.b }[9], [x27], x28 // ST2 { <Vt>.B, <Vt2>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v18.h, v19.h }[3], [x30], #4 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], #4 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i16_POST | st2 { v13.h, v14.h }[5], [x23], x24 // ST2 { <Vt>.H, <Vt2>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v23.s, v24.s }[1], [x22], #8 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], #8 \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i32_POST | st2 { v16.s, v17.s }[3], [x12], x16 // ST2 { <Vt>.S, <Vt2>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, B/H/S \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v27.d, v28.d }[0], [x16], #16 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], #16 \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | ST2i64_POST | st2 { v6.d, v7.d }[1], [x14], x5 // ST2 { <Vt>.D, <Vt2>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 2 element, one lane, D \\ 3 4 4 2.0 V1UnitV01,V1UnitL01,V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2B_IMM | st2b { z19.b, z20.b }, p1, [x18] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2B_IMM | st2b { z26.b, z27.b }, p7, [x15, #-6, mul vl] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2B | st2b { z19.b, z20.b }, p1, [x23, x27] // ST2B { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2D_IMM | st2d { z29.d, z30.d }, p4, [x8] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2D_IMM | st2d { z16.d, z17.d }, p3, [x20, #14, mul vl] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2D | st2d { z17.d, z18.d }, p7, [x2, x28, lsl #3] // ST2D { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2H_IMM | st2h { z5.h, z6.h }, p7, [x23] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2H_IMM | st2h { z11.h, z12.h }, p6, [x4, #10, mul vl] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI[2], V1UnitL[2], V1UnitL01[2], V1UnitS[2], V1UnitSVE01[2], V1UnitV[2] | ST2H | st2h { z3.h, z4.h }, p3, [x22, x16, lsl #1] // ST2H { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 3 4 4 1.0 V1UnitL01[2],V1UnitS[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2W_IMM | st2w { z14.s, z15.s }, p4, [x17] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2W_IMM | st2w { z9.s, z10.s }, p5, [x19, #-8, mul vl] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store two structures from two vectors, scalar + imm \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2] | ST2W | st2w { z5.s, z6.s }, p3, [x23, x13, lsl #2] // ST2W { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store two structures from two vectors, scalar + scalar \\ 2 4 4 1.0 V1UnitL01[2],V1UnitV[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b | st3 { v10.8b, v11.8b, v12.8b }, [x18] // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b | st3 { v26.16b, v27.16b, v28.16b }, [x4] // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h | st3 { v25.4h, v26.4h, v27.4h }, [x11] // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h | st3 { v0.8h, v1.8h, v2.8h }, [x0] // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s | st3 { v19.2s, v20.2s, v21.2s }, [x30] // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s | st3 { v24.4s, v25.4s, v26.4s }, [x8] // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 2 | 5 | 5 | 0.67 | V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d | st3 { v24.2d, v25.2d, v26.2d }, [x25] // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>] \\ ASIMD store, 3 element, multiple, Q-form, D \\ 2 5 5 0.67 V1UnitV01[3],V1UnitL01[3]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v25.8b, v26.8b, v27.8b }, [x23], #24 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x26], #48 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v24.4h, v25.4h, v26.4h }, [x3], #24 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v23.8h, v24.8h, v25.8h }, [x22], #48 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v7.2s, v8.2s, v9.2s }, [x8], #24 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], #24 \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v11.4s, v12.4s, v13.4s }, [x15], #48 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v1.2d, v2.2d, v3.2d }, [x4], #48 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], #48 \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev8b_POST | st3 { v16.8b, v17.8b, v18.8b }, [x26], x2 // ST3 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev16b_POST | st3 { v9.16b, v10.16b, v11.16b }, [x3], x18 // ST3 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev4h_POST | st3 { v2.4h, v3.4h, v4.4h }, [x4], x4 // ST3 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev8h_POST | st3 { v27.8h, v28.8h, v29.8h }, [x27], x8 // ST3 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3Threev2s_POST | st3 { v26.2s, v27.2s, v28.2s }, [x2], x25 // ST3 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, D-form, B/H/S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev4s_POST | st3 { v5.4s, v6.4s, v7.4s }, [x18], x29 // ST3 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, B/H/S \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 3 | 5 | 5 | 0.67 | V1UnitI, V1UnitL[3], V1UnitL01[3], V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | ST3Threev2d_POST | st3 { v26.2d, v27.2d, v28.2d }, [x14], x5 // ST3 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, multiple, Q-form, D \\ 3 5 5 0.67 V1UnitV01[3],V1UnitL01[3],V1UnitI
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i8 | st3 { v8.b, v9.b, v10.b }[4], [x18] // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i16 | st3 { v11.h, v12.h, v13.h }[4], [x0] // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, B/H \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i32 | st3 { v9.s, v10.s, v11.s }[2], [x20] // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, S \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i64 | st3 { v16.d, v17.d, v18.d }[0], [x13] // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 3 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v26.b, v27.b, v28.b }[1], [x12], #3 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], #3 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i8_POST | st3 { v27.b, v28.b, v29.b }[15], [x19], x23 // ST3 { <Vt>.B, <Vt2>.B, <Vt3>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v24.h, v25.h, v26.h }[2], [x14], #6 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], #6 \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i16_POST | st3 { v1.h, v2.h, v3.h }[2], [x0], x23 // ST3 { <Vt>.H, <Vt2>.H, <Vt3>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, B/H \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v25.s, v26.s, v27.s }[2], [x10], #12 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], #12 \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i32_POST | st3 { v8.s, v9.s, v10.s }[0], [x11], x20 // ST3 { <Vt>.S, <Vt2>.S, <Vt3>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, S \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v19.d, v20.d, v21.d }[1], [x5], #24 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], #24 \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST3i64_POST | st3 { v10.d, v11.d, v12.d }[0], [x12], x11 // ST3 { <Vt>.D, <Vt2>.D, <Vt3>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 3 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3B_IMM | st3b { z0.b - z2.b }, p6, [x26] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3B_IMM | st3b { z22.b - z24.b }, p6, [x25, #3, mul vl] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitSVE01[9], V1UnitV[9] | ST3B | st3b { z14.b - z16.b }, p2, [x29, x27] // ST3B { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3D_IMM | st3d { z6.d - z8.d }, p2, [x12] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3D_IMM | st3d { z20.d - z22.d }, p5, [x15, #9, mul vl] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitSVE01[9], V1UnitV[9] | ST3D | st3d { z15.d - z17.d }, p7, [x0, x9, lsl #3] // ST3D { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3H_IMM | st3h { z17.h - z19.h }, p3, [x14] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3H_IMM | st3h { z21.h - z23.h }, p0, [x15, #6, mul vl] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitSVE01[9], V1UnitV[9] | ST3H | st3h { z2.h - z4.h }, p3, [x21, x9, lsl #1] // ST3H { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3W_IMM | st3w { z9.s - z11.s }, p3, [x29] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 7 | 7 | 0.22 | V1UnitL[9], V1UnitL01[9], V1UnitSVE01[9], V1UnitV[9] | ST3W_IMM | st3w { z11.s - z13.s }, p4, [x13, #15, mul vl] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store three structures from three vectors, scalar + imm \\ 2 7 7 0.22 V1UnitL01[9],V1UnitV[9]
+# CHECK-NEXT: 3 | 7 | 7 | 0.22 | V1UnitI[9], V1UnitL[9], V1UnitL01[9], V1UnitS[9], V1UnitSVE01[9], V1UnitV[9] | ST3W | st3w { z19.s - z21.s }, p2, [x22, x28, lsl #2] // ST3W { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store three structures from three vectors, scalar + scalar \\ 3 7 7 0.22 V1UnitL01[9],V1UnitS[9],V1UnitV[9]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b | st4 { v17.8b, v18.8b, v19.8b, v20.8b }, [x8] // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b | st4 { v7.16b, v8.16b, v9.16b, v10.16b }, [x15] // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h | st4 { v5.4h, v6.4h, v7.4h, v8.4h }, [x13] // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h | st4 { v11.8h, v12.8h, v13.8h, v14.8h }, [x1] // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 6 | 6 | 0.33 | V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s | st4 { v15.2s, v16.2s, v17.2s, v18.2s }, [x18] // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 2 6 6 0.33 V1UnitV01[6],V1UnitL01[6]
+# CHECK-NEXT: 2 | 7 | 7 | 0.17 | V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x6] // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 2 7 7 0.17 V1UnitV01[12],V1UnitL01[12]
+# CHECK-NEXT: 2 | 4 | 4 | 0.25 | V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d | st4 { v25.2d, v26.2d, v27.2d, v28.2d }, [x16] // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>] \\ ASIMD store, 4 element, multiple, Q-form, D \\ 2 4 4 0.25 V1UnitV01[8],V1UnitL01[8]
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v16.8b, v17.8b, v18.8b, v19.8b }, [x24], #32 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x13], #64 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v17.4h, v18.4h, v19.4h, v20.4h }, [x3], #32 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v18.8h, v19.8h, v20.8h, v21.8h }, [x5], #64 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v26.2s, v27.2s, v28.2s, v29.2s }, [x17], #32 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], #32 \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v21.4s, v22.4s, v23.4s, v24.4s }, [x7], #64 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v27.2d, v28.2d, v29.2d, v30.2d }, [x25], #64 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], #64 \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv8b_POST | st4 { v24.8b, v25.8b, v26.8b, v27.8b }, [x24], x8 // ST4 { <Vt>.8B, <Vt2>.8B, <Vt3>.8B, <Vt4>.8B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv16b_POST | st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x21], x21 // ST4 { <Vt>.16B, <Vt2>.16B, <Vt3>.16B, <Vt4>.16B }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv4h_POST | st4 { v11.4h, v12.4h, v13.4h, v14.4h }, [x29], x3 // ST4 { <Vt>.4H, <Vt2>.4H, <Vt3>.4H, <Vt4>.4H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv8h_POST | st4 { v16.8h, v17.8h, v18.8h, v19.8h }, [x13], x3 // ST4 { <Vt>.8H, <Vt2>.8H, <Vt3>.8H, <Vt4>.8H }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 0.33 | V1UnitI, V1UnitL[6], V1UnitL01[6], V1UnitSVE01[6], V1UnitV[6], V1UnitV01[6] | ST4Fourv2s_POST | st4 { v13.2s, v14.2s, v15.2s, v16.2s }, [x0], x0 // ST4 { <Vt>.2S, <Vt2>.2S, <Vt3>.2S, <Vt4>.2S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, D-form, B/H/S \\ 3 6 6 0.33 V1UnitV01[6],V1UnitL01[6],V1UnitI
+# CHECK-NEXT: 3 | 7 | 7 | 0.17 | V1UnitI, V1UnitL[12], V1UnitL01[12], V1UnitSVE01[12], V1UnitV[12], V1UnitV01[12] | ST4Fourv4s_POST | st4 { v26.4s, v27.4s, v28.4s, v29.4s }, [x1], x22 // ST4 { <Vt>.4S, <Vt2>.4S, <Vt3>.4S, <Vt4>.4S }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, B/H/S \\ 3 7 7 0.17 V1UnitV01[12],V1UnitL01[12],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 0.25 | V1UnitI, V1UnitL[8], V1UnitL01[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV01[8] | ST4Fourv2d_POST | st4 { v18.2d, v19.2d, v20.2d, v21.2d }, [x10], x28 // ST4 { <Vt>.2D, <Vt2>.2D, <Vt3>.2D, <Vt4>.2D }, [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, multiple, Q-form, D \\ 3 4 4 0.25 V1UnitV01[8],V1UnitL01[8],V1UnitI
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i8 | st4 { v10.b, v11.b, v12.b, v13.b }[3], [x5] // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i16 | st4 { v5.h, v6.h, v7.h, v8.h }[4], [x13] // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, B/H \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 6 | 6 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i32 | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x7] // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, S \\ 2 6 6 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i64 | st4 { v23.d, v24.d, v25.d, v26.d }[1], [x5] // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>] \\ ASIMD store, 4 element, one lane, D \\ 2 4 4 1.0 V1UnitV01[2],V1UnitL01[2]
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v22.b, v23.b, v24.b, v25.b }[0], [x29], #4 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], #4 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i8_POST | st4 { v6.b, v7.b, v8.b, v9.b }[9], [x26], x21 // ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<indexb>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v19.h, v20.h, v21.h, v22.h }[2], [x18], #8 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], #8 \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i16_POST | st4 { v6.h, v7.h, v8.h, v9.h }[4], [x9], x9 // ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<indexh>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, B/H \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v19.s, v20.s, v21.s, v22.s }[2], [x27], #16 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], #16 \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 6 | 6 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i32_POST | st4 { v22.s, v23.s, v24.s, v25.s }[0], [x29], x21 // ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<indexs>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, S \\ 3 6 6 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x16], #32 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], #32 \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 3 | 4 | 4 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | ST4i64_POST | st4 { v10.d, v11.d, v12.d, v13.d }[0], [x12], x11 // ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<indexd>], [<Xn|SP>], <Xm> \\ ASIMD store, 4 element, one lane, D \\ 3 4 4 1.0 V1UnitV01[2],V1UnitL01[2],V1UnitI
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4B_IMM | st4b { z22.b - z25.b }, p0, [x0] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4B_IMM | st4b { z1.b - z4.b }, p7, [x1, #20, mul vl] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitSVE01[18], V1UnitV[18] | ST4B | st4b { z28.b - z31.b }, p4, [x27, x20] // ST4B { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4D_IMM | st4d { z19.d - z22.d }, p1, [x11] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4D_IMM | st4d { z0.d - z3.d }, p6, [x7, #-24, mul vl] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitSVE01[18], V1UnitV[18] | ST4D | st4d { z28.d - z31.d }, p5, [x19, x20, lsl #3] // ST4D { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4H_IMM | st4h { z14.h - z17.h }, p1, [x24] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4H_IMM | st4h { z27.h - z30.h }, p3, [x26, #16, mul vl] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitSVE01[18], V1UnitV[18] | ST4H | st4h { z2.h - z5.h }, p5, [x30, x17, lsl #1] // ST4H { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4W_IMM | st4w { z3.s - z6.s }, p0, [x0] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 2 | 11 | 11 | 0.11 | V1UnitL[18], V1UnitL01[18], V1UnitSVE01[18], V1UnitV[18] | ST4W_IMM | st4w { z5.s - z8.s }, p2, [x0, #-20, mul vl] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Contiguous store four structures from four vectors, scalar + imm \\ 2 11 11 0.11 V1UnitL01[18],V1UnitV[18]
+# CHECK-NEXT: 3 | 11 | 11 | 0.11 | V1UnitI[18], V1UnitL[18], V1UnitL01[18], V1UnitS[18], V1UnitSVE01[18], V1UnitV[18] | ST4W | st4w { z21.s - z24.s }, p5, [x5, x18, lsl #2] // ST4W { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Contiguous store four structures from four vectors, scalar + scalar \\ 3 11 11 0.11 V1UnitL01[18],V1UnitS[18],V1UnitV[18]
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w19, [x26] // STLRB <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRB | stlrb w9, [x19] // STLRB <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STLRH | stlrh w4, [x7] // STLRH <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
@@ -6977,40 +6980,40 @@ test:
# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRB | stlxrb w0, w1, [x20] // STLXRB <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w16, w17, [x21] // STLXRH <Ws>, <Wt>, [<Xn|SP>] \\ No description \\ No scheduling info
# CHECK-NEXT: 3 | 5 | 5 | 1.50 | V1UnitD, V1UnitL[2], V1UnitL01 | STLXRH | stlxrh w12, w26, [x23] // STLXRH <Ws>, <Wt>, [<Xn|SP>, #0] \\ No description \\ No scheduling info
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s29, s16, [x11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPSi | stnp s17, s19, [x27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d4, d3, [x30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPDi | stnp d25, d31, [x28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q28, q22, [x3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STNPQi | stnp q17, q15, [x16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPSi | stnp s29, s16, [x11] // STNP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPSi | stnp s17, s19, [x27, #-40] // STNP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPDi | stnp d4, d3, [x30] // STNP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPDi | stnp d25, d31, [x28, #328] // STNP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPQi | stnp q28, q22, [x3] // STNP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STNPQi | stnp q17, q15, [x16, #656] // STNP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w29, w25, [x5] // STNP <Wt1>, <Wt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPWi | stnp w16, w18, [x27, #-232] // STNP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x20, x16, [x8] // STNP <Xt1>, <Xt2>, [<Xn|SP>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STNPXi | stnp x6, x20, [x15, #-120] // STNP <Xt1>, <Xt2>, [<Xn|SP>, #<immd>] \\ Store pair, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z18.b }, p7, [x21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRI | stnt1b { z9.b }, p6, [x26, #-7, mul vl] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1B_ZRR | stnt1b { z18.b }, p1, [x1, x20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z16.d }, p3, [x3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRI | stnt1d { z27.d }, p4, [x16, #-6, mul vl] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1D_ZRR | stnt1d { z11.d }, p0, [x18, x22, lsl #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z27.h }, p5, [x16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1H_ZRI | stnt1h { z2.h }, p2, [x30, #-8, mul vl] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitV | STNT1H_ZRR | stnt1h { z0.h }, p1, [x7, x1, lsl #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z9.s }, p3, [x20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRI | stnt1w { z12.s }, p4, [x11, #-6, mul vl] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STNT1W_ZRR | stnt1w { z28.s }, p6, [x6, x0, lsl #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpost | stp s10, s19, [x13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpost | stp d19, d20, [x30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | STPQpost | stp q3, q17, [x14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSpre | stp s19, s24, [x27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDpre | stp d16, d21, [x28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitV[2], V1UnitV01[2] | STPQpre | stp q10, q31, [x0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s27, s11, [x30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d30, d19, [x25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q25, q3, [x27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPSi | stp s29, s13, [x0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPDi | stp d15, d12, [x20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STPQi | stp q13, q16, [x3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1B_ZRI | stnt1b { z18.b }, p7, [x21] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1B_ZRI | stnt1b { z9.b }, p6, [x26, #-7, mul vl] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1B_ZRR | stnt1b { z18.b }, p1, [x1, x20] // STNT1B { <Zt>.B }, <Pg>, [<Xn|SP>, <Xm>] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1D_ZRI | stnt1d { z16.d }, p3, [x3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1D_ZRI | stnt1d { z27.d }, p4, [x16, #-6, mul vl] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1D_ZRR | stnt1d { z11.d }, p0, [x18, x22, lsl #3] // STNT1D { <Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1H_ZRI | stnt1h { z27.h }, p5, [x16] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1H_ZRI | stnt1h { z2.h }, p2, [x30, #-8, mul vl] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitS, V1UnitSVE01, V1UnitV | STNT1H_ZRR | stnt1h { z0.h }, p1, [x7, x1, lsl #1] // STNT1H { <Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1] \\ Non temporal store, scalar + scalar \\ 3 2 2 2.0 V1UnitL01,V1UnitS,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1W_ZRI | stnt1w { z9.s }, p3, [x20] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1W_ZRI | stnt1w { z12.s }, p4, [x11, #-6, mul vl] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, #<imm>, MUL VL] \\ Non temporal store, scalar + imm \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STNT1W_ZRR | stnt1w { z28.s }, p6, [x6, x0, lsl #2] // STNT1W { <Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2] \\ Non temporal store, scalar + scalar \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPSpost | stp s10, s19, [x13], #76 // STP <St1>, <St2>, [<Xn|SP>], #<imm32> \\ Store vector pair, immed post-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPDpost | stp d19, d20, [x30], #-144 // STP <Dt1>, <Dt2>, [<Xn|SP>], #<imm64> \\ Store vector pair, immed post-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | STPQpost | stp q3, q17, [x14], #-976 // STP <Qt1>, <Qt2>, [<Xn|SP>], #<imm128> \\ Store vector pair, immed post-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPSpre | stp s19, s24, [x27, #-224]! // STP <St1>, <St2>, [<Xn|SP>, #<imm32>]! \\ Store vector pair, immed pre-index, S-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPDpre | stp d16, d21, [x28, #168]! // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>]! \\ Store vector pair, immed pre-index, D-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 1.00 | V1UnitI, V1UnitL[2], V1UnitL01[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | STPQpre | stp q10, q31, [x0, #608]! // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>]! \\ Store vector pair, immed pre-index, Q-form \\ 3 2 2 1.0 V1UnitI,V1UnitL01[2],V1UnitV01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPSi | stp s27, s11, [x30] // STP <St1>, <St2>, [<Xn|SP>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPDi | stp d30, d19, [x25] // STP <Dt1>, <Dt2>, [<Xn|SP>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPQi | stp q25, q3, [x27] // STP <Qt1>, <Qt2>, [<Xn|SP>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPSi | stp s29, s13, [x0, #-44] // STP <St1>, <St2>, [<Xn|SP>, #<imm32>] \\ Store vector pair, immed offset, S-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPDi | stp d15, d12, [x20, #-72] // STP <Dt1>, <Dt2>, [<Xn|SP>, #<imm64>] \\ Store vector pair, immed offset, D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STPQi | stp q13, q16, [x3, #320] // STP <Qt1>, <Qt2>, [<Xn|SP>, #<imm128>] \\ Store vector pair, immed offset, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpost | stp w18, w8, [x6], #196 // STP <Wt1>, <Wt2>, [<Xn|SP>], #<imms> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPXpost | stp x10, x17, [x7], #-328 // STP <Xt1>, <Xt2>, [<Xn|SP>], #<immd> \\ Store pair, immed post-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STPWpre | stp w4, w3, [x0, #-36]! // STP <Wt1>, <Wt2>, [<Xn|SP>, #<imms>]! \\ Store pair, immed pre-index \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
@@ -7027,26 +7030,26 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWui | str w28, [x2, #1796] // STR <Wt>, [<Xn|SP>, #<pimm32>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x22, [x29] // STR <Xt>, [<Xn|SP>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXui | str x2, [x10, #9472] // STR <Xt>, [<Xn|SP>, #<pimm64>] \\ Store register, unsigned immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpost | str b21, [x28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpost | str h13, [x10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpost | str s14, [x8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpost | str d24, [x10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpost | str q20, [x30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBpre | str b9, [x24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHpre | str h0, [x4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSpre | str s19, [x23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDpre | str d20, [x2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQpre | str q24, [x20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b5, [x11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBui | str b20, [x23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h23, [x15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHui | str h24, [x6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s25, [x19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSui | str s2, [x14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d15, [x2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDui | str d27, [x7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q13, [x16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQui | str q4, [x7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBpost | str b21, [x28], #-62 // STR <Bt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHpost | str h13, [x10], #-194 // STR <Ht>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSpost | str s14, [x8], #166 // STR <St>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDpost | str d24, [x10], #134 // STR <Dt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQpost | str q20, [x30], #-108 // STR <Qt>, [<Xn|SP>], #<simm> \\ Store vector reg, unsigned immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBpre | str b9, [x24, #242]! // STR <Bt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHpre | str h0, [x4, #-193]! // STR <Ht>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSpre | str s19, [x23, #115]! // STR <St>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDpre | str d20, [x2, #-30]! // STR <Dt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQpre | str q24, [x20, #62]! // STR <Qt>, [<Xn|SP>, #<simm>]! \\ Store vector reg, immed post-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBui | str b5, [x11] // STR <Bt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBui | str b20, [x23, #2409] // STR <Bt>, [<Xn|SP>, #<pimm8>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHui | str h23, [x15] // STR <Ht>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHui | str h24, [x6, #492] // STR <Ht>, [<Xn|SP>, #<pimm16>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSui | str s25, [x19] // STR <St>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSui | str s2, [x14, #984] // STR <St>, [<Xn|SP>, #<pimm32>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDui | str d15, [x2] // STR <Dt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDui | str d27, [x7, #25704] // STR <Dt>, [<Xn|SP>, #<pimm64>] \\ Store vector reg, immed pre-index, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQui | str q13, [x16] // STR <Qt>, [<Xn|SP>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQui | str q4, [x7, #96] // STR <Qt>, [<Xn|SP>, #<pimm128>] \\ Store vector reg, immed pre-index, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p4, [x5] // STR <Pt>, [<Xn|SP>] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitL, V1UnitL01 | STR_PXI | str p3, [x21, #-78, mul vl] // STR <Pt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from predicate reg \\ 1 1 1 2.0 V1UnitL01
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w14, [x9, x17] // STR <Wt>, [<Xn|SP>, <Xm>] \\ Store register, register offset, basic \\ 2 1 1 2.0 V1UnitL01,V1UnitD
@@ -7065,44 +7068,44 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x20, [x4, x2, sxtx #3] // STR <Xt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store register, register offset, extend, scale by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRWroX | str w8, [x11, x10, lsl #2] // STR <Wt>, [<Xn|SP>, <Xm>, LSL #2] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRXroX | str x27, [x2, x11, lsl #3] // STR <Xt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store register, register offset, scaled by 4/8 \\ 2 1 1 2.0 V1UnitL01,V1UnitD
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b14, [x13, x25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b30, [x16, w26, uxtw] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroW | str b20, [x19, w3, sxtw] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRBroX | str b13, [x29, x19, sxtx] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h16, [x5, x24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h15, [x15, w15, uxtw] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h3, [x6, w15, sxtw] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h2, [x1, x28, sxtx] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h30, [x29, w30, uxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroW | str h10, [x21, w11, sxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h0, [x15, x9, sxtx #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRHroX | str h13, [x0, x26, lsl #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s2, [x16, x17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s20, [x24, w10, uxtw] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s4, [x9, w14, sxtw] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s3, [x23, x26, sxtx] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s27, [x17, w9, uxtw #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroW | str s0, [x11, w20, sxtw #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s20, [x17, x14, sxtx #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRSroX | str s0, [x15, x28, lsl #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d5, [x26, x6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d11, [x9, w5, uxtw] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d16, [x20, w8, sxtw] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d0, [x12, x9, sxtx] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d4, [x21, w25, uxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroW | str d28, [x20, w4, sxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d20, [x13, x23, sxtx #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRDroX | str d31, [x19, x28, lsl #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q13, [x24, x1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q1, [x25, w9, uxtw] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q25, [x20, w15, sxtw] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q25, [x0, x15, sxtx] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q6, [x13, w0, uxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroW | str q27, [x4, w15, sxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q3, [x23, x0, sxtx #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STRQroX | str q27, [x1, x28, lsl #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z3, [x0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV | STR_ZXI | str z8, [x6, #188, mul vl] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBroX | str b14, [x13, x25] // STR <Bt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBroW | str b30, [x16, w26, uxtw] // STR <Bt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBroW | str b20, [x19, w3, sxtw] // STR <Bt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRBroX | str b13, [x29, x19, sxtx] // STR <Bt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroX | str h16, [x5, x24] // STR <Ht>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroW | str h15, [x15, w15, uxtw] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroW | str h3, [x6, w15, sxtw] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroX | str h2, [x1, x28, sxtx] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroW | str h30, [x29, w30, uxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, UXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroW | str h10, [x21, w11, sxtw #1] // STR <Ht>, [<Xn|SP>, <Wm>, SXTW #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroX | str h0, [x15, x9, sxtx #1] // STR <Ht>, [<Xn|SP>, <Xm>, SXTX #1] \\ Store vector reg, register offset, extend, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRHroX | str h13, [x0, x26, lsl #1] // STR <Ht>, [<Xn|SP>, <Xm>, LSL #1] \\ Store vector reg, register offset, scale, H-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroX | str s2, [x16, x17] // STR <St>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroW | str s20, [x24, w10, uxtw] // STR <St>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroW | str s4, [x9, w14, sxtw] // STR <St>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroX | str s3, [x23, x26, sxtx] // STR <St>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroW | str s27, [x17, w9, uxtw #2] // STR <St>, [<Xn|SP>, <Wm>, UXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroW | str s0, [x11, w20, sxtw #2] // STR <St>, [<Xn|SP>, <Wm>, SXTW #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroX | str s20, [x17, x14, sxtx #2] // STR <St>, [<Xn|SP>, <Xm>, SXTX #2] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRSroX | str s0, [x15, x28, lsl #2] // STR <St>, [<Xn|SP>, <Xm>, LSL #2] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroX | str d5, [x26, x6] // STR <Dt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroW | str d11, [x9, w5, uxtw] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroW | str d16, [x20, w8, sxtw] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroX | str d0, [x12, x9, sxtx] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroW | str d4, [x21, w25, uxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, UXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroW | str d28, [x20, w4, sxtw #3] // STR <Dt>, [<Xn|SP>, <Wm>, SXTW #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroX | str d20, [x13, x23, sxtx #3] // STR <Dt>, [<Xn|SP>, <Xm>, SXTX #3] \\ Store vector reg, register offset, extend, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRDroX | str d31, [x19, x28, lsl #3] // STR <Dt>, [<Xn|SP>, <Xm>, LSL #3] \\ Store vector reg, register offset, scale, S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroX | str q13, [x24, x1] // STR <Qt>, [<Xn|SP>, <Xm>] \\ Store vector reg, register offset, basic, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroW | str q1, [x25, w9, uxtw] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroW | str q25, [x20, w15, sxtw] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroX | str q25, [x0, x15, sxtx] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX] \\ Store vector reg, register offset, extend, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroW | str q6, [x13, w0, uxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, UXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroW | str q27, [x4, w15, sxtw #4] // STR <Qt>, [<Xn|SP>, <Wm>, SXTW #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroX | str q3, [x23, x0, sxtx #4] // STR <Qt>, [<Xn|SP>, <Xm>, SXTX #4] \\ Store vector reg, register offset, extend, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 3 | 2 | 2 | 2.00 | V1UnitI, V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STRQroX | str q27, [x1, x28, lsl #4] // STR <Qt>, [<Xn|SP>, <Xm>, LSL #4] \\ Store vector reg, register offset, scale, Q-form \\ 3 2 2 2.0 V1UnitI,V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STR_ZXI | str z3, [x0] // STR <Zt>, [<Xn|SP>] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV | STR_ZXI | str z8, [x6, #188, mul vl] // STR <Zt>, [<Xn|SP>, #<imm>, MUL VL] \\ Store from vector reg \\ 2 2 2 2.0 V1UnitL01,V1UnitV
# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpost | strb w23, [x11], #34 // STRB <Wt>, [<Xn|SP>], #<simm> \\ Store, immed post-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
# CHECK-NEXT: 3 | 1 | 1 | 2.00 | V1UnitD, V1UnitI, V1UnitL, V1UnitL01 | STRBBpre | strb w5, [x19, #-175]! // STRB <Wt>, [<Xn|SP>, #<simm>]! \\ Store, immed pre-indexed \\ 3 1 1 2.0 V1UnitL01,V1UnitD,V1UnitI
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STRBBui | strb w18, [x30] // STRB <Wt>, [<Xn|SP>] \\ Store, immed offset \\ 2 1 1 2.0 V1UnitL01,V1UnitD
@@ -7131,16 +7134,16 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRBi | sttrb w0, [x20, #-114] // STTRB <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w26, [x11] // STTRH <Wt>, [<Xn|SP>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STTRHi | sttrh w11, [x30, #-78] // STTRH <Wt>, [<Xn|SP>, #<simm>] \\ Store register, immed unprivileged \\ 2 1 1 2.0 V1UnitL01,V1UnitD
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b29, [x8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURBi | stur b5, [x0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s10, [x4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURSi | stur s9, [x14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d1, [x28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURDi | stur d6, [x6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q6, [x16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
-# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitV, V1UnitV01 | STURQi | stur q5, [x13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURBi | stur b29, [x8] // STUR <Bt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURBi | stur b5, [x0, #80] // STUR <Bt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x15] // STUR <Ht>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURHi | stur h10, [x12, #-227] // STUR <Ht>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURSi | stur s10, [x4] // STUR <St>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURSi | stur s9, [x14, #21] // STUR <St>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURDi | stur d1, [x28] // STUR <Dt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURDi | stur d6, [x6, #188] // STUR <Dt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, B/H/S/D-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURQi | stur q6, [x16] // STUR <Qt>, [<Xn|SP>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitL, V1UnitL01, V1UnitSVE01, V1UnitV, V1UnitV01 | STURQi | stur q5, [x13, #-253] // STUR <Qt>, [<Xn|SP>, #<simm>] \\ Store vector reg, unscaled immed, Q-form \\ 2 2 2 2.0 V1UnitL01,V1UnitV01
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w29, [x27] // STUR <Wt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURWi | stur w14, [x2, #-34] // STUR <Wt>, [<Xn|SP>, #<simm>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
# CHECK-NEXT: 2 | 1 | 1 | 2.00 | V1UnitD, V1UnitL, V1UnitL01 | STURXi | stur x29, [x10] // STUR <Xt>, [<Xn|SP>] \\ Store register, unscaled immed \\ 2 1 1 2.0 V1UnitL01,V1UnitD
@@ -7173,25 +7176,25 @@ test:
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWri | sub wsp, wsp, #84, lsl #12 // SUB <Wd|WSP>, <Wn|WSP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x18, x22, #36 // SUB <Xd|SP>, <Xn|SP>, #<imm> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXri | sub x17, x20, #184 // SUB <Xd|SP>, <Xn|SP>, #<imm>, <shift> \\ ALU, basic \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_B | sub z18.b, z18.b, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_S | sub z22.s, z22.s, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZI_H | sub z15.h, z15.h, #50176 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUB_ZI_B | sub z18.b, z18.b, #117 // SUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUB_ZI_S | sub z22.s, z22.s, #4 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUB_ZI_H | sub z15.h, z15.h, #50176 // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBWrs | sub w0, w21, w2, lsl #4 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamountl> \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w22, w7, w13, lsl #19 // SUB <Wd>, <Wn>, <Wm>, LSL #<wamounth> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBWrs | sub w1, w18, w16, asr #4 // SUB <Wd>, <Wn>, <Wm>, <shift> #<wamount> \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SUBXrs | sub x27, x29, x16, lsl #1 // SUB <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x10, x15, lsl #35 // SUB <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | SUBXrs | sub x24, x19, x13, lsr #20 // SUB <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv1i64 | sub d18, d25, d0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBv2i32 | sub v15.2s, v14.2s, v11.2s // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZPmZ_H | sub z18.h, p4/m, z18.h, z7.h // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUB_ZZZ_B | sub z29.b, z19.b, z8.b // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv4i32_v4i16 | subhn v7.4h, v10.4s, v13.4s // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUBHNv2i64_v4i32 | subhn2 v24.4s, v24.2d, v8.2d // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_B | subr z13.b, z13.b, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_S | subr z17.s, z17.s, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZI_D | subr z15.d, z15.d, #100 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUBR_ZPmZ_D | subr z21.d, p7/m, z21.d, z24.d // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUBv1i64 | sub d18, d25, d0 // SUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUBv2i32 | sub v15.2s, v14.2s, v11.2s // SUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUB_ZPmZ_H | sub z18.h, p4/m, z18.h, z7.h // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUB_ZZZ_B | sub z29.b, z19.b, z8.b // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUBHNv4i32_v4i16 | subhn v7.4h, v10.4s, v13.4s // SUBHN <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUBHNv2i64_v4i32 | subhn2 v24.4s, v24.2d, v8.2d // SUBHN2 <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUBR_ZI_B | subr z13.b, z13.b, #229 // SUBR <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUBR_ZI_S | subr z17.s, z17.s, #140 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUBR_ZI_D | subr z15.d, z15.d, #100 // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUBR_ZPmZ_D | subr z21.d, p7/m, z21.d, z24.d // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w25, wsp, w13 // SUBS <Wd>, <Wn|WSP>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w10, wsp, w9, uxth // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSWrx | subs w20, wsp, w3, sxth #2 // SUBS <Wd>, <Wn|WSP>, <Wm>, <wextend> #<amount> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
@@ -7212,72 +7215,72 @@ test:
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | SUBSXrs | subs x18, x1, x5 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amountl> \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x28, x26, x4, lsl #49 // SUBS <Xd>, <Xn>, <Xm>, LSL #<amounth> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | SUBSXrs | subs x26, x14, x30, lsr #35 // SUBS <Xd>, <Xn>, <Xm>, <shift> #<amount> \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | SUDOTlanev8i8 | sudot v4.2s, v20.8b, v18.4b[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | SUDOT_ZZZI | sudot z5.s, z30.b, z3.b[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKHI_ZZ_D | sunpkhi z22.d, z16.s // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | SUNPKLO_ZZ_H | sunpklo z10.h, z0.b // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv1i8 | suqadd b15, b21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | SUQADDv16i8 | suqadd v26.16b, v27.16b // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | SUDOTlanev8i8 | sudot v4.2s, v20.8b, v18.4b[2] // SUDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUDOT_ZZZI | sudot z5.s, z30.b, z3.b[1] // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUNPKHI_ZZ_D | sunpkhi z22.d, z16.s // SUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | SUNPKLO_ZZ_H | sunpklo z10.h, z0.b // SUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUQADDv1i8 | suqadd b15, b21 // SUQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | SUQADDv16i8 | suqadd v26.16b, v27.16b // SUQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SVC | svc #0x89cb // SVC #<imm> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxtb w7, w20 // SXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtb x18, w14 // SXTB <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTB_ZPmZ_H | sxtb z16.h, p5/m, z15.h // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTH_ZPmZ_S | sxth z4.s, p7/m, z11.s // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | SXTW_ZPmZ_D | sxtw z12.d, p1/m, z16.d // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SXTB_ZPmZ_H | sxtb z16.h, p5/m, z15.h // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SXTH_ZPmZ_S | sxth z4.s, p7/m, z11.s // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | SXTW_ZPmZ_D | sxtw z12.d, p1/m, z16.d // SXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMWri | sxth w23, w2 // SXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxth x22, w17 // SXTH <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v4.8h, v21.8b, #0 // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v20.2d, v30.4s, #0 // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv8i8_shift | sshll v4.8h, v21.8b, #0 // SXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | SSHLLv4i32_shift | sshll2 v20.2d, v30.4s, #0 // SXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | SBFMXri | sxtw x18, w22 // SXTW <Xd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #6, c6, c0, #3 // SYS #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | sys #7, c12, c5, #3, x8 // SYS #<op1>, <Cn>, <Cm>, #<op2>, <Xt> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSLxt | sysl x16, #5, c11, c8, #5 // SYSL <Xt>, #<op1>, <Cn>, <Cm>, #<op2> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBLv8i8Two | tbl v7.8b, { v2.16b, v3.16b }, v17.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBLv16i8Three | tbl v3.16b, { v10.16b, v11.16b, v12.16b }, v29.16b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 1 4 4 1.0 V1UnitV01[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBLv8i8Four | tbl v9.8b, { v22.16b, v23.16b, v24.16b, v25.16b }, v14.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 1 4 4 0.67 V1UnitV01[3]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBLv16i8One | tbl v29.16b, { v3.16b }, v17.16b // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | TBLv8i8Two | tbl v7.8b, { v2.16b, v3.16b }, v17.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | TBLv16i8Three | tbl v3.16b, { v10.16b, v11.16b, v12.16b }, v29.16b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 3 table regs \\ 1 4 4 1.0 V1UnitV01[2]
+# CHECK-NEXT: 1 | 4 | 4 | 0.67 | V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | TBLv8i8Four | tbl v9.8b, { v22.16b, v23.16b, v24.16b, v25.16b }, v14.8b // TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 4 table regs \\ 1 4 4 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | TBLv16i8One | tbl v29.16b, { v3.16b }, v17.16b // TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup, 1 or 2 table regs \\ 1 2 2 2.0 V1UnitV01
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZW | tbnz w3, #28, test // TBNZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBNZX | tbnz x30, #48, test // TBNZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV01[2] | TBXv8i8Two | tbx v25.8b, { v13.16b, v14.16b }, v30.8b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 1 4 4 1.0 V1UnitV01[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.67 | V1UnitV[3], V1UnitV01[3] | TBXv16i8Three | tbx v22.16b, { v3.16b, v4.16b, v5.16b }, v25.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 1 6 6 0.67 V1UnitV01[3]
-# CHECK-NEXT: 1 | 6 | 6 | 0.40 | V1UnitV[5], V1UnitV01[5] | TBXv16i8Four | tbx v23.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v26.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 1 6 6 0.4 V1UnitV01[5]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | TBXv8i8One | tbx v16.8b, { v21.16b }, v18.8b // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE01[2], V1UnitV[2], V1UnitV01[2] | TBXv8i8Two | tbx v25.8b, { v13.16b, v14.16b }, v30.8b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 2 table reg \\ 1 4 4 1.0 V1UnitV01[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.67 | V1UnitSVE01[3], V1UnitV[3], V1UnitV01[3] | TBXv16i8Three | tbx v22.16b, { v3.16b, v4.16b, v5.16b }, v25.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 3 table reg \\ 1 6 6 0.67 V1UnitV01[3]
+# CHECK-NEXT: 1 | 6 | 6 | 0.40 | V1UnitSVE01[5], V1UnitV[5], V1UnitV01[5] | TBXv16i8Four | tbx v23.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v26.16b // TBX <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 4 table reg \\ 1 6 6 0.4 V1UnitV01[5]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE01, V1UnitV, V1UnitV01 | TBXv8i8One | tbx v16.8b, { v21.16b }, v18.8b // TBX <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta> \\ ASIMD table lookup extension, 1 table reg \\ 1 2 2 2.0 V1UnitV01
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZW | tbz w17, #16, test // TBZ W<t>, #<imms>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 2.00 | V1UnitB | TBZX | tbz x22, #41, test // TBZ X<t>, #<immd>, <label> \\ Compare and branch \\ 1 1 1 2.0 V1UnitB
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi vmalle1 // TLBI <tlbi_op> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | SYSxt | tlbi ipas2e1is, x7 // TLBI <tlbi_op2>, <Xt> \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN1v2i32 | trn1 v30.2s, v21.2s, v25.2s // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | TRN1v2i32 | trn1 v30.2s, v21.2s, v25.2s // TRN1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN1_PPP_S | trn1 p1.s, p4.s, p0.s // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | TRN2_PPP_H | trn2 p0.h, p5.h, p7.h // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate transpose \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | TRN2v2i64 | trn2 v27.2d, v29.2d, v10.2d // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | TRN2v2i64 | trn2 v27.2d, v29.2d, v10.2d // TRN2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD transpose \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWri | tst w25, #0xe00 // TST <Wn>, #<imms> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXri | tst x3, #0x1e00 // TST <Xn>, #<immd> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSWrs | tst w9, w14 // TST <Wn>, <Wm> \\ ALU, basic, unconditional, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSWrs | tst w10, w3, asr #16 // TST <Wn>, <Wm>, <shift> #<wamount> \\ Test/Compare, shift by immed \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
# CHECK-NEXT: 2 | 1 | 1 | 3.00 | V1UnitFlg, V1UnitI | ANDSXrs | tst x11, x28 // TST <Xn>, <Xm> \\ ALU, basic, flagset \\ 2 1 1 3.0 V1UnitI,V1UnitFlg
# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitFlg, V1UnitI, V1UnitM | ANDSXrs | tst x9, x7, asr #33 // TST <Xn>, <Xm>, <shift> #<amount> \\ Logical, shift, flagset \\ 2 2 2 2.0 V1UnitM,V1UnitFlg
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABAv16i8 | uaba v13.16b, v14.16b, v19.16b // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv2i32_v2i64 | uabal v13.2d, v16.2s, v11.2s // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UABALv8i16_v4i32 | uabal2 v17.4s, v0.8h, v1.8h // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDv4i32 | uabd v23.4s, v4.4s, v30.4s // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UABD_ZPmZ_B | uabd z5.b, p5/m, z5.b, z10.b // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i16_v4i32 | uabdl v13.4s, v26.4h, v7.4h // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UABDLv4i32_v2i64 | uabdl2 v15.2d, v9.4s, v10.4s // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | UADALPv2i32_v1i64 | uadalp v31.1d, v14.2s // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i8_v8i16 | uaddl v29.8h, v8.8b, v31.8b // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLv8i16_v4i32 | uaddl2 v15.4s, v22.8h, v14.8h // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDLPv2i32_v1i64 | uaddlp v15.1d, v5.2s // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv8i8v | uaddlv h24, v24.8b // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UADDLVv16i8v | uaddlv h19, v31.16b // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i16v | uaddlv s12, v24.4h // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv8i16v | uaddlv s30, v0.8h // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UADDLVv4i32v | uaddlv d6, v19.4s // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_B | uaddv d9, p5, z1.b // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_H | uaddv d26, p0, z25.h // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_S | uaddv d4, p1, z1.s // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UADDV_VPZ_D | uaddv d28, p6, z6.d // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv2i32_v2i64 | uaddw v17.2d, v9.2d, v12.2s // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UADDWv8i16_v4i32 | uaddw2 v15.4s, v13.4s, v4.8h // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UABAv16i8 | uaba v13.16b, v14.16b, v19.16b // UABA <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff accum \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UABALv2i32_v2i64 | uabal v13.2d, v16.2s, v11.2s // UABAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UABALv8i16_v4i32 | uabal2 v17.4s, v0.8h, v1.8h // UABAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff accum long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UABDv4i32 | uabd v23.4s, v4.4s, v30.4s // UABD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD absolute diff \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UABD_ZPmZ_B | uabd z5.b, p5/m, z5.b, z10.b // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UABDLv4i16_v4i32 | uabdl v13.4s, v26.4h, v7.4h // UABDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UABDLv4i32_v2i64 | uabdl2 v15.2d, v9.4s, v10.4s // UABDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD absolute diff long \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UADALPv2i32_v1i64 | uadalp v31.1d, v14.2s // UADALP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD pairwise add and accumulate long \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UADDLv8i8_v8i16 | uaddl v29.8h, v8.8b, v31.8b // UADDL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UADDLv8i16_v4i32 | uaddl2 v15.4s, v22.8h, v14.8h // UADDL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UADDLPv2i32_v1i64 | uaddlp v15.1d, v5.2s // UADDLP <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UADDLVv8i8v | uaddlv h24, v24.8b // UADDLV H<d>, <Vn>.8B \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UADDLVv16i8v | uaddlv h19, v31.16b // UADDLV H<d>, <Vn>.16B \\ ASIMD arith, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UADDLVv4i16v | uaddlv s12, v24.4h // UADDLV S<d>, <Vn>.4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UADDLVv8i16v | uaddlv s30, v0.8h // UADDLV S<d>, <Vn>.8H \\ ASIMD arith, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UADDLVv4i32v | uaddlv d6, v19.4s // UADDLV D<d>, <Vn>.4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UADDV_VPZ_B | uaddv d9, p5, z1.b // UADDV <Dd>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UADDV_VPZ_H | uaddv d26, p0, z25.h // UADDV <Dd>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UADDV_VPZ_S | uaddv d4, p1, z1.s // UADDV <Dd>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UADDV_VPZ_D | uaddv d28, p6, z6.d // UADDV <Dd>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UADDWv2i32_v2i64 | uaddw v17.2d, v9.2d, v12.2s // UADDW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UADDWv8i16_v4i32 | uaddw2 v15.4s, v13.4s, v4.8h // UADDW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | ubfiz w11, w6, #30, #1 // UBFIZ <Wd>, <Wn>, #<lsbs>, #<widths> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMXri | ubfiz x27, x15, #49, #9 // UBFIZ <Xd>, <Xn>, #<lsbd>, #<widthd> \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | lsl w19, w16, #7 // UBFM <Wd>, <Wn>, #<immrs>, #<immss> \\ Bitfield move, basic \\ 1 1 1 4.0 V1UnitI
@@ -7296,112 +7299,112 @@ test:
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXHri | ucvtf h17, x12 // UCVTF <Hd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXSri | ucvtf s8, x0 // UCVTF <Sd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UCVTFUXDri | ucvtf d22, x17 // UCVTF <Dd>, <Xn> \\ FP convert, from gen to vec reg \\ 1 3 3 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFh | ucvtf h22, h16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFs | ucvtf s17, s18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFd | ucvtf d19, d1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i16_shift | ucvtf v18.4h, v11.4h, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | UCVTFv8i16_shift | ucvtf v22.8h, v20.8h, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i32_shift | ucvtf v16.2s, v17.2s, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4i32_shift | ucvtf v17.4s, v23.4s, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2i64_shift | ucvtf v18.2d, v20.2d, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i16 | ucvtf h7, h21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i32 | ucvtf s25, s7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv1i64 | ucvtf d30, d29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f16 | ucvtf v9.4h, v25.4h // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitV[4], V1UnitV02[4] | UCVTFv8f16 | ucvtf v24.8h, v31.8h // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f32 | ucvtf v14.2s, v2.2s // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | UCVTFv4f32 | ucvtf v20.4s, v0.4s // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UCVTFv2f64 | ucvtf v27.2d, v3.2d // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 6 | 6 | 0.25 | V1UnitV[4], V1UnitV0[4], V1UnitV01[4], V1UnitV02[4] | UCVTF_ZPmZ_HtoH | ucvtf z31.h, p5/m, z30.h // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 1 6 6 0.25 V1UnitV0[4]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoH | ucvtf z23.h, p7/m, z9.s // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoS | ucvtf z1.s, p1/m, z10.s // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 4 | 4 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UCVTF_ZPmZ_StoD | ucvtf z24.d, p5/m, z9.s // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 1 4 4 0.5 V1UnitV0[2]
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoH | ucvtf z30.h, p2/m, z24.d // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoS | ucvtf z9.s, p5/m, z9.d // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 3 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UCVTF_ZPmZ_DtoD | ucvtf z18.d, p6/m, z19.d // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 1 3 3 1.0 V1UnitV0
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFh | ucvtf h22, h16, #11 // UCVTF H<d>, H<n>, #<hfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFs | ucvtf s17, s18, #18 // UCVTF S<d>, S<n>, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFd | ucvtf d19, d1, #2 // UCVTF D<d>, D<n>, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTFv4i16_shift | ucvtf v18.4h, v11.4h, #7 // UCVTF <Vd>.4H, <Vn>.4H, #<hfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UCVTFv8i16_shift | ucvtf v22.8h, v20.8h, #10 // UCVTF <Vd>.8H, <Vn>.8H, #<hfbits> \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv2i32_shift | ucvtf v16.2s, v17.2s, #11 // UCVTF <Vd>.2S, <Vn>.2S, #<sfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTFv4i32_shift | ucvtf v17.4s, v23.4s, #2 // UCVTF <Vd>.4S, <Vn>.4S, #<sfbits> \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv2i64_shift | ucvtf v18.2d, v20.2d, #60 // UCVTF <Vd>.2D, <Vn>.2D, #<dfbits> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv1i16 | ucvtf h7, h21 // UCVTF <Hd>, <Hn> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv1i32 | ucvtf s25, s7 // UCVTF S<d>, S<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv1i64 | ucvtf d30, d29 // UCVTF D<d>, D<n> \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTFv4f16 | ucvtf v9.4h, v25.4h // UCVTF <Vd>.4H, <Vn>.4H \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 6 | 6 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UCVTFv8f16 | ucvtf v24.8h, v31.8h // UCVTF <Vd>.8H, <Vn>.8H \\ ASIMD FP convert, other, Q-form F16 \\ 1 6 6 0.5 V1UnitV02[4]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv2f32 | ucvtf v14.2s, v2.2s // UCVTF <Vd>.2S, <Vn>.2S \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTFv4f32 | ucvtf v20.4s, v0.4s // UCVTF <Vd>.4S, <Vn>.4S \\ ASIMD FP convert, other, D-form F16 and Q-form F32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UCVTFv2f64 | ucvtf v27.2d, v3.2d // UCVTF <Vd>.2D, <Vn>.2D \\ ASIMD FP convert, other, D-form F32 and Q-form F64 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 6 | 6 | 0.25 | V1UnitSVE0[8], V1UnitSVE01[8], V1UnitV[8], V1UnitV02[8] | UCVTF_ZPmZ_HtoH | ucvtf z31.h, p5/m, z30.h // UCVTF <Zd>.H, <Pg>/M, <Zn>.H \\ Convert to floating point, 16b to half \\ 2 6 6 0.25 V1UnitSVE0[8],V1UnitSVE0[8]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UCVTF_ZPmZ_StoH | ucvtf z23.h, p7/m, z9.s // UCVTF <Zd>.H, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UCVTF_ZPmZ_StoS | ucvtf z1.s, p1/m, z10.s // UCVTF <Zd>.S, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 4 | 4 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UCVTF_ZPmZ_StoD | ucvtf z24.d, p5/m, z9.s // UCVTF <Zd>.D, <Pg>/M, <Zn>.S \\ Convert to floating point, 32b to single or half \\ 2 4 4 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTF_ZPmZ_DtoH | ucvtf z30.h, p2/m, z24.d // UCVTF <Zd>.H, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTF_ZPmZ_DtoS | ucvtf z9.s, p5/m, z9.d // UCVTF <Zd>.S, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 3 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UCVTF_ZPmZ_DtoD | ucvtf z18.d, p6/m, z19.d // UCVTF <Zd>.D, <Pg>/M, <Zn>.D \\ Convert to floating point, 64b to float or convert to double \\ 2 3 3 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
# CHECK-NEXT: 1 | 12 | 12 | 0.08 | V1UnitI[12], V1UnitM[12], V1UnitM0[12] | UDIVWr | udiv w12, w17, w22 // UDIV <Wd>, <Wn>, <Wm> \\ Divide, W-form \\ 1 12 12 0.08 V1UnitM0[12]
# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitI[20], V1UnitM[20], V1UnitM0[20] | UDIVXr | udiv x7, x2, x23 // UDIV <Xd>, <Xn>, <Xm> \\ Divide, X-form \\ 1 20 20 0.05 V1UnitM0[20]
-# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | UDIV_ZPmZ_S | udiv z30.s, p5/m, z30.s, z10.s // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
-# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | UDIV_ZPmZ_D | udiv z31.d, p5/m, z31.d, z29.d // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
-# CHECK-NEXT: 1 | 12 | 12 | 0.09 | V1UnitV[11], V1UnitV0[11], V1UnitV01[11], V1UnitV02[11] | UDIVR_ZPmZ_S | udivr z19.s, p4/m, z19.s, z8.s // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 1 12 12 0.09 V1UnitV0[11]
-# CHECK-NEXT: 1 | 20 | 20 | 0.05 | V1UnitV[20], V1UnitV0[20], V1UnitV01[20], V1UnitV02[20] | UDIVR_ZPmZ_D | udivr z3.d, p5/m, z3.d, z8.d // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 1 20 20 0.05 V1UnitV0[20]
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZI_S | udot z0.s, z5.b, z4.b[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZI_D | udot z19.d, z1.h, z13.h[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV, V1UnitV01 | UDOT_ZZZ_S | udot z22.s, z29.b, z4.b // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 1 3 1 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 1 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UDOT_ZZZ_D | udot z9.d, z1.h, z11.h // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 1 4 1 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev8i8 | udot v10.2s, v11.8b, v21.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTlanev16i8 | udot v7.4s, v21.16b, v6.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UDOTv8i8 | udot v19.2s, v31.8b, v17.8b // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHADDv8i16 | uhadd v10.8h, v7.8h, v7.8h // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UHSUBv4i16 | uhsub v12.4h, v16.4h, v28.4h // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 12 | 12 | 0.09 | V1UnitSVE0[22], V1UnitSVE01[22], V1UnitV[22], V1UnitV02[22] | UDIV_ZPmZ_S | udiv z30.s, p5/m, z30.s, z10.s // UDIV <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+# CHECK-NEXT: 2 | 20 | 20 | 0.05 | V1UnitSVE0[40], V1UnitSVE01[40], V1UnitV[40], V1UnitV02[40] | UDIV_ZPmZ_D | udiv z31.d, p5/m, z31.d, z29.d // UDIV <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+# CHECK-NEXT: 2 | 12 | 12 | 0.09 | V1UnitSVE0[22], V1UnitSVE01[22], V1UnitV[22], V1UnitV02[22] | UDIVR_ZPmZ_S | udivr z19.s, p4/m, z19.s, z8.s // UDIVR <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Divides, 32 bit \\ 2 12 12 0.09 V1UnitSVE0[22],V1UnitSVE0[22]
+# CHECK-NEXT: 2 | 20 | 20 | 0.05 | V1UnitSVE0[40], V1UnitSVE01[40], V1UnitV[40], V1UnitV02[40] | UDIVR_ZPmZ_D | udivr z3.d, p5/m, z3.d, z8.d // UDIVR <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Divides, 64 bit \\ 2 20 20 0.05 V1UnitSVE0[40],V1UnitSVE0[40]
+# CHECK-NEXT: 2 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UDOT_ZZZI_S | udot z0.s, z5.b, z4.b[1] // UDOT <Zda>.S, <Zn>.B, <Zms>.B[<imms>] \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 1 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UDOT_ZZZI_D | udot z19.d, z1.h, z13.h[1] // UDOT <Zda>.D, <Zn>.H, <Zmd>.H[<immd>] \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UDOT_ZZZ_S | udot z22.s, z29.b, z4.b // UDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit \\ 2 3 1 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 4 | 1 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UDOT_ZZZ_D | udot z9.d, z1.h, z11.h // UDOT <Zda>.D, <Zn>.H, <Zm>.H \\ Dot product, 16 bit \\ 2 4 1 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | UDOTlanev8i8 | udot v10.2s, v11.8b, v21.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | UDOTlanev16i8 | udot v7.4s, v21.16b, v6.4b[3] // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | UDOTv8i8 | udot v19.2s, v31.8b, v17.8b // UDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UHADDv8i16 | uhadd v10.8h, v7.8h, v7.8h // UHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UHSUBv4i16 | uhsub v12.4h, v16.4h, v28.4h // UHSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMADDLrrr | umaddl x9, w28, w9, x19 // UMADDL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZI_B | umax z8.b, z8.b, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMAX_ZPmZ_B | umax z27.b, p1/m, z27.b, z13.b // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXv16i8 | umax v7.16b, v11.16b, v7.16b // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMAXPv8i16 | umaxp v15.8h, v8.8h, v12.8h // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv8i8v | umaxv b19, v7.8b // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMAXVv16i8v | umaxv b12, v10.16b // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i16v | umaxv h27, v5.4h // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv8i16v | umaxv h11, v22.8h // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMAXVv4i32v | umaxv s5, v25.4s // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_B | umaxv b9, p7, z19.b // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_H | umaxv h8, p7, z26.h // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_S | umaxv s15, p2, z28.s // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMAXV_VPZ_D | umaxv d11, p4, z11.d // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZI_S | umin z21.s, z21.s, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UMIN_ZPmZ_S | umin z31.s, p2/m, z31.s, z4.s // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINv16i8 | umin v0.16b, v26.16b, v2.16b // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UMINPv4i32 | uminp v28.4s, v16.4s, v15.4s // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMINVv8i8v | uminv b23, v21.8b // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV13[2] | UMINVv16i8v | uminv b3, v10.16b // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i16v | uminv h6, v22.4h // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UMINVv8i16v | uminv h23, v3.8h // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | UMINVv4i32v | uminv s29, v19.4s // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 14 | 14 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_B | uminv b2, p5, z8.b // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 1 14 14 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 12 | 12 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_H | uminv h28, p0, z0.h // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 1 12 12 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 10 | 10 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_S | uminv s10, p1, z29.s // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 1 10 10 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 8 | 8 | 0.50 | V1UnitV[2], V1UnitV1[2], V1UnitV01[2], V1UnitV13[2] | UMINV_VPZ_D | uminv d24, p5, z29.d // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 1 8 8 0.5 V1UnitV1[2]
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_indexed | umlal v22.4s, v14.4h, v0.h[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv2i32_indexed | umlal v28.2d, v31.2s, v0.s[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv8i16_indexed | umlal2 v31.4s, v7.8h, v15.h[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_indexed | umlal2 v10.2d, v4.4s, v3.s[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i16_v4i32 | umlal v29.4s, v20.4h, v30.4h // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLALv4i32_v2i64 | umlal2 v10.2d, v28.4s, v19.4s // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i16_indexed | umlsl v21.4s, v12.4h, v7.h[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_indexed | umlsl v20.2d, v20.2s, v2.s[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv8i16_indexed | umlsl2 v27.4s, v28.8h, v6.h[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv4i32_indexed | umlsl2 v30.2d, v23.4s, v1.s[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv2i32_v2i64 | umlsl v11.2d, v23.2s, v1.2s // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV02 | UMLSLv16i8_v8i16 | umlsl2 v11.8h, v20.16b, v2.16b // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | UMMLA | ummla v14.4s, v17.16b, v25.16b // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UMAX_ZI_B | umax z8.b, z8.b, #12 // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UMAX_ZPmZ_B | umax z27.b, p1/m, z27.b, z13.b // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UMAXv16i8 | umax v7.16b, v11.16b, v7.16b // UMAX <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UMAXPv8i16 | umaxp v15.8h, v8.8h, v12.8h // UMAXP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMAXVv8i8v | umaxv b19, v7.8b // UMAXV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UMAXVv16i8v | umaxv b12, v10.16b // UMAXV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMAXVv4i16v | umaxv h27, v5.4h // UMAXV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMAXVv8i16v | umaxv h11, v22.8h // UMAXV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMAXVv4i32v | umaxv s5, v25.4s // UMAXV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMAXV_VPZ_B | umaxv b9, p7, z19.b // UMAXV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMAXV_VPZ_H | umaxv h8, p7, z26.h // UMAXV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMAXV_VPZ_S | umaxv s15, p2, z28.s // UMAXV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMAXV_VPZ_D | umaxv d11, p4, z11.d // UMAXV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UMIN_ZI_S | umin z21.s, z21.s, #139 // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UMIN_ZPmZ_S | umin z31.s, p2/m, z31.s, z4.s // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UMINv16i8 | umin v0.16b, v26.16b, v2.16b // UMIN <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UMINPv4i32 | uminp v28.4s, v16.4s, v15.4s // UMINP <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD max/min, basic and pair-wise \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMINVv8i8v | uminv b23, v21.8b // UMINV B<d>, <Vn>.8B \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UMINVv16i8v | uminv b3, v10.16b // UMINV B<d>, <Vn>.16B \\ ASIMD max/min, reduce, 16B \\ 1 4 4 1.0 V1UnitV13[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMINVv4i16v | uminv h6, v22.4h // UMINV H<d>, <Vn>.4H \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMINVv8i16v | uminv h23, v3.8h // UMINV H<d>, <Vn>.8H \\ ASIMD max/min, reduce, 8B/8H \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UMINVv4i32v | uminv s29, v19.4s // UMINV S<d>, <Vn>.4S \\ ASIMD max/min, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 14 | 14 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMINV_VPZ_B | uminv b2, p5, z8.b // UMINV B<d>, <Pg>, <Zn>.B \\ Reduction, arithmetic, B form \\ 2 14 14 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 12 | 12 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMINV_VPZ_H | uminv h28, p0, z0.h // UMINV H<d>, <Pg>, <Zn>.H \\ Reduction, arithmetic, H form \\ 2 12 12 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 10 | 10 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMINV_VPZ_S | uminv s10, p1, z29.s // UMINV S<d>, <Pg>, <Zn>.S \\ Reduction, arithmetic, S form \\ 2 10 10 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 2 | 8 | 8 | 0.50 | V1UnitSVE1[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV13[4] | UMINV_VPZ_D | uminv d24, p5, z29.d // UMINV D<d>, <Pg>, <Zn>.D \\ Reduction, arithmetic, D form \\ 2 8 8 0.5 V1UnitSVE1[4],V1UnitSVE1[4]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv4i16_indexed | umlal v22.4s, v14.4h, v0.h[6] // UMLAL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv2i32_indexed | umlal v28.2d, v31.2s, v0.s[1] // UMLAL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv8i16_indexed | umlal2 v31.4s, v7.8h, v15.h[5] // UMLAL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv4i32_indexed | umlal2 v10.2d, v4.4s, v3.s[2] // UMLAL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv4i16_v4i32 | umlal v29.4s, v20.4h, v30.4h // UMLAL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLALv4i32_v2i64 | umlal2 v10.2d, v28.4s, v19.4s // UMLAL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv4i16_indexed | umlsl v21.4s, v12.4h, v7.h[5] // UMLSL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv2i32_indexed | umlsl v20.2d, v20.2s, v2.s[0] // UMLSL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv8i16_indexed | umlsl2 v27.4s, v28.8h, v6.h[4] // UMLSL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv4i32_indexed | umlsl2 v30.2d, v23.4s, v1.s[2] // UMLSL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv2i32_v2i64 | umlsl v11.2d, v23.2s, v1.2s // UMLSL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMLSLv16i8_v8i16 | umlsl2 v11.8h, v20.16b, v2.16b // UMLSL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply accumulate long \\ 1 4 1 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | UMMLA | ummla v14.4s, v17.16b, v25.16b // UMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMSUBLrrr | umnegl x23, w5, w23 // UMNEGL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi8_idx0 | umov w6, v22.b[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi8 | umov w29, v0.b[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi16_idx0 | umov w10, v25.h[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi16 | umov w6, v7.h[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32_idx0 | mov w8, v8.s[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi32 | mov w20, v1.s[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64_idx0 | mov x20, v11.d[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV[4] | UMOVvi64 | mov x29, v7.d[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi8_idx0 | umov w6, v22.b[0] // UMOV <Wd>, <Vn>.B[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi8 | umov w29, v0.b[11] // UMOV <Wd>, <Vn>.B[<indexb>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi16_idx0 | umov w10, v25.h[0] // UMOV <Wd>, <Vn>.H[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi16 | umov w6, v7.h[3] // UMOV <Wd>, <Vn>.H[<indexh>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi32_idx0 | mov w8, v8.s[0] // UMOV <Wd>, <Vn>.S[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi32 | mov w20, v1.s[3] // UMOV <Wd>, <Vn>.S[<indexs>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi64_idx0 | mov x20, v11.d[0] // UMOV <Xd>, <Vn>.D[0] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
+# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitSVE01[4], V1UnitV[4] | UMOVvi64 | mov x29, v7.d[1] // UMOV <Xd>, <Vn>.D[<indexd>] \\ ASIMD transfer, element to gen reg \\ 1 2 2 1.0 V1UnitV[4]
# CHECK-NEXT: 1 | 2 | 1 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UMSUBLrrr | umsubl x21, w16, w28, x6 // UMSUBL <Xd>, <Wn>, <Wm>, <Xa> \\ Multiply accumulate long \\ 1 2 1 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_B | umulh z20.b, p4/m, z20.b, z6.b // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_H | umulh z30.h, p6/m, z30.h, z15.h // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV, V1UnitV0, V1UnitV01, V1UnitV02 | UMULH_ZPmZ_S | umulh z11.s, p7/m, z11.s, z8.s // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 1 4 4 1.0 V1UnitV0
-# CHECK-NEXT: 1 | 5 | 5 | 0.50 | V1UnitV[2], V1UnitV0[2], V1UnitV01[2], V1UnitV02[2] | UMULH_ZPmZ_D | umulh z3.d, p3/m, z3.d, z2.d // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 1 5 5 0.5 V1UnitV0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UMULH_ZPmZ_B | umulh z20.b, p4/m, z20.b, z6.b // UMULH <Zdn>.B, <Pg>/M, <Zdn>.B, <Zm>.B \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UMULH_ZPmZ_H | umulh z30.h, p6/m, z30.h, z15.h // UMULH <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | UMULH_ZPmZ_S | umulh z11.s, p7/m, z11.s, z8.s // UMULH <Zdn>.S, <Pg>/M, <Zdn>.S, <Zm>.S \\ Multiply, B, H, S element size \\ 2 4 4 1.0 V1UnitSVE0[2],V1UnitSVE0[2]
+# CHECK-NEXT: 2 | 5 | 5 | 0.50 | V1UnitSVE0[4], V1UnitSVE01[4], V1UnitV[4], V1UnitV02[4] | UMULH_ZPmZ_D | umulh z3.d, p3/m, z3.d, z2.d // UMULH <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D \\ Multiply, D element size \\ 2 5 5 0.5 V1UnitSVE0[4],V1UnitSVE0[4]
# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitI, V1UnitM | UMULHrr | umulh x23, x22, x19 // UMULH <Xd>, <Xn>, <Xm> \\ Multiply high \\ 1 3 3 2.0 V1UnitM
# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitI, V1UnitM | UMADDLrrr | umull x5, w17, w23 // UMULL <Xd>, <Wn>, <Wm> \\ Multiply long \\ 1 2 2 2.0 V1UnitM
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_indexed | umull v27.4s, v1.4h, v8.h[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv2i32_indexed | umull v22.2d, v28.2s, v6.s[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv8i16_indexed | umull2 v18.4s, v26.8h, v10.h[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i32_indexed | umull2 v28.2d, v21.4s, v1.s[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv4i16_v4i32 | umull v23.4s, v26.4h, v19.4h // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | UMULLv16i8_v8i16 | umull2 v11.8h, v29.16b, v29.16b // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_B | uqadd z18.b, z18.b, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z2.s, z2.s, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZI_S | uqadd z24.s, z24.s, #56 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQADD_ZZZ_H | uqadd z6.h, z28.h, z5.h // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv1i32 | uqadd s0, s24, s30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQADDv2i64 | uqadd v14.2d, v22.2d, v20.2d // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv4i16_indexed | umull v27.4s, v1.4h, v8.h[6] // UMULL <Vd>.4S, <Vn>.4H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv2i32_indexed | umull v22.2d, v28.2s, v6.s[1] // UMULL <Vd>.2D, <Vn>.2S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv8i16_indexed | umull2 v18.4s, v26.8h, v10.h[1] // UMULL2 <Vd>.4S, <Vn>.8H, <Vmh>.H[<indexh>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv4i32_indexed | umull2 v28.2d, v21.4s, v1.s[0] // UMULL2 <Vd>.2D, <Vn>.4S, <Vms>.S[<indexs>] \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv4i16_v4i32 | umull v23.4s, v26.4h, v19.4h // UMULL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | UMULLv16i8_v8i16 | umull2 v11.8h, v29.16b, v29.16b // UMULL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD multiply long \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQADD_ZI_B | uqadd z18.b, z18.b, #14 // UQADD <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQADD_ZI_S | uqadd z2.s, z2.s, #14 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQADD_ZI_S | uqadd z24.s, z24.s, #56 // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQADD_ZZZ_H | uqadd z6.h, z28.h, z5.h // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UQADDv1i32 | uqadd s0, s24, s30 // UQADD <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UQADDv2i64 | uqadd v14.2d, v22.2d, v20.2d // UQADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w10 // UQDECB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w8, vl3 // UQDECB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECB_WPiI | uqdecb w3, vl32 // UQDECB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -7414,30 +7417,30 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x1 // UQDECD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x12, vl8 // UQDECD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECD_XPiI | uqdecd x10, vl64, mul #10 // UQDECD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z0.d // UQDECD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z8.d, vl3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECD_ZPiI | uqdecd z27.d, vl16, mul #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECD_ZPiI | uqdecd z0.d // UQDECD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECD_ZPiI | uqdecd z8.d, vl3 // UQDECD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECD_ZPiI | uqdecd z27.d, vl16, mul #2 // UQDECD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w30 // UQDECH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w28, mul3 // UQDECH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_WPiI | uqdech w5, vl5, mul #8 // UQDECH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x2 // UQDECH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x15, vl7 // UQDECH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECH_XPiI | uqdech x17, vl256, mul #10 // UQDECH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z5.h // UQDECH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z16.h, vl128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECH_ZPiI | uqdech z27.h, vl128, mul #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECH_ZPiI | uqdech z5.h // UQDECH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECH_ZPiI | uqdech z16.h, vl128 // UQDECH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECH_ZPiI | uqdech z27.h, vl128, mul #15 // UQDECH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_WP_H | uqdecp w19, p5.h // UQDECP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECP_XP_B | uqdecp x1, p1.b // UQDECP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | UQDECP_ZP_S | uqdecp z20.s, p0.s // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | UQDECP_ZP_S | uqdecp z20.s, p0.s // UQDECP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w17 // UQDECW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w11, vl256 // UQDECW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_WPiI | uqdecw w13, mul4, mul #13 // UQDECW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x7 // UQDECW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x28, vl32 // UQDECW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQDECW_XPiI | uqdecw x0, vl256, mul #3 // UQDECW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z29.s // UQDECW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z22.s, vl2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQDECW_ZPiI | uqdecw z20.s, vl2, mul #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECW_ZPiI | uqdecw z29.s // UQDECW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECW_ZPiI | uqdecw z22.s, vl2 // UQDECW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQDECW_ZPiI | uqdecw z20.s, vl2, mul #10 // UQDECW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w2 // UQINCB <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w21, vl128 // UQINCB <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCB_WPiI | uqincb w0, all, mul #13 // UQINCB <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
@@ -7450,128 +7453,128 @@ test:
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x0 // UQINCD <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x29, mul4 // UQINCD <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCD_XPiI | uqincd x20, pow2, mul #3 // UQINCD <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z29.d // UQINCD <Zdn>.D \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z4.d, vl64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCD_ZPiI | uqincd z12.d, vl6, mul #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCD_ZPiI | uqincd z29.d // UQINCD <Zdn>.D \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCD_ZPiI | uqincd z4.d, vl64 // UQINCD <Zdn>.D, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCD_ZPiI | uqincd z12.d, vl6, mul #13 // UQINCD <Zdn>.D, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w4 // UQINCH <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w23, mul3 // UQINCH <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_WPiI | uqinch w27, vl7, mul #3 // UQINCH <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x8 // UQINCH <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x13, mul3 // UQINCH <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCH_XPiI | uqinch x5, mul4, mul #9 // UQINCH <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z21.h // UQINCH <Zdn>.H \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z1.h, vl8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCH_ZPiI | uqinch z7.h, vl7, mul #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCH_ZPiI | uqinch z21.h // UQINCH <Zdn>.H \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCH_ZPiI | uqinch z1.h, vl8 // UQINCH <Zdn>.H, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCH_ZPiI | uqinch z7.h, vl7, mul #12 // UQINCH <Zdn>.H, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_WP_D | uqincp w4, p5.d // UQINCP <Wdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCP_XP_D | uqincp x13, p5.d // UQINCP <Xdn>, <Pm>.<T> \\ Predicate counting scalar, active predicate \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 2 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitV[2], V1UnitV01[2] | UQINCP_ZP_S | uqincp z1.s, p0.s // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 2 7 7 0.5 V1UnitM0[2],V1UnitV01[2]
+# CHECK-NEXT: 3 | 7 | 7 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2], V1UnitSVE01[2], V1UnitV[2] | UQINCP_ZP_S | uqincp z1.s, p0.s // UQINCP <Zdn>.<T>, <Pm> \\ Predicate counting vector, active predicate \\ 3 7 7 0.5 V1UnitM0[2],V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w13 // UQINCW <Wdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w26, vl8 // UQINCW <Wdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_WPiI | uqincw w3, vl16, mul #13 // UQINCW <Wdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x26 // UQINCW <Xdn> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x13, vl256 // UQINCW <Xdn>, <pattern> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UQINCW_XPiI | uqincw x29, vl7, mul #6 // UQINCW <Xdn>, <pattern>, MUL #<imm> \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z26.s // UQINCW <Zdn>.S \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z31.s, vl5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQINCW_ZPiI | uqincw z12.s, vl7, mul #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv1i32 | uqrshl s17, s5, s8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHLv8i8 | uqrshl v25.8b, v13.8b, v23.8b // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNb | uqrshrn b12, h9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNh | uqrshrn h1, s28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNs | uqrshrn s1, d4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i8_shift | uqrshrn v17.8b, v24.8h, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i16_shift | uqrshrn v29.4h, v25.4s, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv2i32_shift | uqrshrn v16.2s, v0.2d, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv16i8_shift | uqrshrn2 v5.16b, v28.8h, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv8i16_shift | uqrshrn2 v28.8h, v22.4s, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQRSHRNv4i32_shift | uqrshrn2 v20.4s, v13.2d, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLb | uqshl b16, b25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLh | uqshl h22, h27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLs | uqshl s9, s5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLd | uqshl d25, d1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv16i8_shift | uqshl v25.16b, v0.16b, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16_shift | uqshl v1.4h, v12.4h, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i32_shift | uqshl v23.2s, v4.2s, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv2i64_shift | uqshl v28.2d, v23.2d, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv1i8 | uqshl b22, b26, b2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHLv4i16 | uqshl v8.4h, v17.4h, v13.4h // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNb | uqshrn b16, h27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNh | uqshrn h4, s2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNs | uqshrn s0, d15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i8_shift | uqshrn v19.8b, v26.8h, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i16_shift | uqshrn v31.4h, v17.4s, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv2i32_shift | uqshrn v1.2s, v11.2d, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv16i8_shift | uqshrn2 v23.16b, v16.8h, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv8i16_shift | uqshrn2 v1.8h, v12.4s, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQSHRNv4i32_shift | uqshrn2 v30.4s, v29.2d, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_B | uqsub z26.b, z26.b, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_S | uqsub z19.s, z19.s, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZI_H | uqsub z15.h, z15.h, #26624 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UQSUB_ZZZ_D | uqsub z25.d, z13.d, z19.d // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv1i32 | uqsub s16, s21, s6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UQSUBv4i32 | uqsub v19.4s, v0.4s, v5.4s // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv1i32 | uqxtn s3, d27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv2i32 | uqxtn v26.2s, v5.2d // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | UQXTNv16i8 | uqxtn2 v15.16b, v22.8h // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URECPEv2i32 | urecpe v10.2s, v8.2s // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | URECPEv4i32 | urecpe v1.4s, v23.4s // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | URHADDv2i32 | urhadd v16.2s, v19.2s, v2.2s // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv1i64 | urshl d24, d22, d29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHLv8i8 | urshl v31.8b, v5.8b, v3.8b // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRd | urshr d23, d19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv16i8_shift | urshr v23.16b, v14.16b, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i16_shift | urshr v16.4h, v13.4h, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv4i32_shift | urshr v10.4s, v10.4s, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitV, V1UnitV13 | URSHRv2i64_shift | urshr v2.2d, v16.2d, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitV, V1UnitV02 | URSQRTEv2i32 | ursqrte v15.2s, v20.2s // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
-# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitV[2], V1UnitV02[2] | URSQRTEv4i32 | ursqrte v31.4s, v14.4s // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAd | ursra d24, d24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv8i8_shift | ursra v14.8b, v18.8b, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv4i16_shift | ursra v9.4h, v9.4h, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i32_shift | ursra v25.2s, v17.2s, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | URSRAv2i64_shift | ursra v17.2d, v16.2d, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTlanev8i8 | usdot v0.2s, v18.8b, v10.4b[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | USDOT_ZZZI | usdot z5.s, z25.b, z2.b[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USDOTv8i8 | usdot v17.2s, v0.8b, v29.8b // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitV[2] | USDOT_ZZZ | usdot z8.s, z6.b, z18.b // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv1i64 | ushl d7, d17, d3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLv8i8 | ushl v6.8b, v26.8b, v6.8b // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i8_shift | ushll v18.8h, v24.8b, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v12.4s, v10.4h, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv2i32_shift | ushll v16.2d, v16.2s, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv8i16_shift | ushll2 v18.4s, v22.8h, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i32_shift | ushll2 v31.2d, v12.4s, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRd | ushr d23, d22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i8_shift | ushr v24.8b, v0.8b, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv8i16_shift | ushr v21.8h, v31.8h, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i32_shift | ushr v27.2s, v24.2s, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHRv2i64_shift | ushr v0.2d, v27.2d, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitV | USMMLA | usmmla v25.4s, v10.16b, v11.16b // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv1i16 | usqadd h14, h13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USQADDv2i64 | usqadd v18.2d, v23.2d // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAd | usra d22, d24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv16i8_shift | usra v16.16b, v5.16b, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv4i16_shift | usra v18.4h, v22.4h, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i32_shift | usra v13.2s, v12.2s, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitV, V1UnitV13 | USRAv2i64_shift | usra v30.2d, v30.2d, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv4i16_v4i32 | usubl v22.4s, v18.4h, v3.4h // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBLv16i8_v8i16 | usubl2 v12.8h, v23.16b, v15.16b // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i8_v8i16 | usubw v30.8h, v12.8h, v20.8b // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | USUBWv8i16_v4i32 | usubw2 v2.4s, v0.4s, v30.8h // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKHI_ZZ_D | uunpkhi z26.d, z26.s // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV01 | UUNPKLO_ZZ_S | uunpklo z10.s, z11.h // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 1 2 2 2.0 V1UnitV01
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCW_ZPiI | uqincw z26.s // UQINCW <Zdn>.S \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCW_ZPiI | uqincw z31.s, vl5 // UQINCW <Zdn>.S, <pattern> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQINCW_ZPiI | uqincw z12.s, vl7, mul #4 // UQINCW <Zdn>.S, <pattern>, MUL #<imm> \\ Predicate counting vector \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHLv1i32 | uqrshl s17, s5, s8 // UQRSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHLv8i8 | uqrshl v25.8b, v13.8b, v23.8b // UQRSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNb | uqrshrn b12, h9, #4 // UQRSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNh | uqrshrn h1, s28, #2 // UQRSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNs | uqrshrn s1, d4, #12 // UQRSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv8i8_shift | uqrshrn v17.8b, v24.8h, #4 // UQRSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv4i16_shift | uqrshrn v29.4h, v25.4s, #10 // UQRSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv2i32_shift | uqrshrn v16.2s, v0.2d, #10 // UQRSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv16i8_shift | uqrshrn2 v5.16b, v28.8h, #6 // UQRSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv8i16_shift | uqrshrn2 v28.8h, v22.4s, #15 // UQRSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQRSHRNv4i32_shift | uqrshrn2 v20.4s, v13.2d, #4 // UQRSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLb | uqshl b16, b25, #3 // UQSHL B<d>, B<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLh | uqshl h22, h27, #3 // UQSHL H<d>, H<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLs | uqshl s9, s5, #2 // UQSHL S<d>, S<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLd | uqshl d25, d1, #30 // UQSHL D<d>, D<n>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv16i8_shift | uqshl v25.16b, v0.16b, #7 // UQSHL <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv4i16_shift | uqshl v1.4h, v12.4h, #15 // UQSHL <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv2i32_shift | uqshl v23.2s, v4.2s, #17 // UQSHL <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv2i64_shift | uqshl v28.2d, v23.2d, #48 // UQSHL <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv1i8 | uqshl b22, b26, b2 // UQSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHLv4i16 | uqshl v8.4h, v17.4h, v13.4h // UQSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNb | uqshrn b16, h27, #6 // UQSHRN B<d>, H<n>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNh | uqshrn h4, s2, #15 // UQSHRN H<d>, S<n>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNs | uqshrn s0, d15, #22 // UQSHRN S<d>, D<n>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv8i8_shift | uqshrn v19.8b, v26.8h, #3 // UQSHRN <Vd>.8B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv4i16_shift | uqshrn v31.4h, v17.4s, #8 // UQSHRN <Vd>.4H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv2i32_shift | uqshrn v1.2s, v11.2d, #9 // UQSHRN <Vd>.2S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv16i8_shift | uqshrn2 v23.16b, v16.8h, #1 // UQSHRN2 <Vd>.16B, <Vn>.8H, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv8i16_shift | uqshrn2 v1.8h, v12.4s, #2 // UQSHRN2 <Vd>.8H, <Vn>.4S, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQSHRNv4i32_shift | uqshrn2 v30.4s, v29.2d, #32 // UQSHRN2 <Vd>.4S, <Vn>.2D, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQSUB_ZI_B | uqsub z26.b, z26.b, #174 // UQSUB <Zdn>.B, <Zdn>.B, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQSUB_ZI_S | uqsub z19.s, z19.s, #228 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQSUB_ZI_H | uqsub z15.h, z15.h, #26624 // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>, <shift> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UQSUB_ZZZ_D | uqsub z25.d, z13.d, z19.d // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> \\ Arithmetic, basic \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UQSUBv1i32 | uqsub s16, s21, s6 // UQSUB <V><d>, <V><n>, <V><m> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UQSUBv4i32 | uqsub v19.4s, v0.4s, v5.4s // UQSUB <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQXTNv1i32 | uqxtn s3, d27 // UQXTN <Vb><d>, <Va><n> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQXTNv2i32 | uqxtn v26.2s, v5.2d // UQXTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | UQXTNv16i8 | uqxtn2 v15.16b, v22.8h // UQXTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow, saturating \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | URECPEv2i32 | urecpe v10.2s, v8.2s // URECPE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | URECPEv4i32 | urecpe v1.4s, v23.4s // URECPE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | URHADDv2i32 | urhadd v16.2s, v19.2s, v2.2s // URHADD <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHLv1i64 | urshl d24, d22, d29 // URSHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHLv8i8 | urshl v31.8b, v5.8b, v3.8b // URSHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHRd | urshr d23, d19, #62 // URSHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHRv16i8_shift | urshr v23.16b, v14.16b, #2 // URSHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHRv4i16_shift | urshr v16.4h, v13.4h, #7 // URSHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHRv4i32_shift | urshr v10.4s, v10.4s, #21 // URSHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 4 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSHRv2i64_shift | urshr v2.2d, v16.2d, #30 // URSHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, complex \\ 1 4 4 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 3 | 2.00 | V1UnitSVE0, V1UnitSVE01, V1UnitV, V1UnitV02 | URSQRTEv2i32 | ursqrte v15.2s, v20.2s // URSQRTE <Vd>.2S, <Vn>.2S \\ ASIMD reciprocal and square root estimate, D-form U32 \\ 1 3 3 2.0 V1UnitV02
+# CHECK-NEXT: 1 | 4 | 4 | 1.00 | V1UnitSVE0[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV02[2] | URSQRTEv4i32 | ursqrte v31.4s, v14.4s // URSQRTE <Vd>.4S, <Vn>.4S \\ ASIMD reciprocal and square root estimate, Q-form U32 \\ 1 4 4 1.0 V1UnitV02[2]
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSRAd | ursra d24, d24, #48 // URSRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSRAv8i8_shift | ursra v14.8b, v18.8b, #1 // URSRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSRAv4i16_shift | ursra v9.4h, v9.4h, #16 // URSRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSRAv2i32_shift | ursra v25.2s, v17.2s, #9 // URSRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | URSRAv2i64_shift | ursra v17.2d, v16.2d, #61 // URSRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | USDOTlanev8i8 | usdot v0.2s, v18.8b, v10.4b[3] // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.4B[<index>] \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | USDOT_ZZZI | usdot z5.s, z25.b, z2.b[1] // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>] \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | USDOTv8i8 | usdot v17.2s, v0.8b, v29.8b // USDOT <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD dot product using signed and unsigned integers \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 3 | 1 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | USDOT_ZZZ | usdot z8.s, z6.b, z18.b // USDOT <Zda>.S, <Zn>.B, <Zm>.B \\ Dot product, 8 bit, using signed and unsigned integers \\ 1 3 1 2.0 V1UnitV[2]
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLv1i64 | ushl d7, d17, d3 // USHL <V><d>, <V><n>, <V><m> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLv8i8 | ushl v6.8b, v26.8b, v6.8b // USHL <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD shift by register, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv8i8_shift | ushll v18.8h, v24.8b, #4 // USHLL <Vd>.8H, <Vn>.8B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v12.4s, v10.4h, #3 // USHLL <Vd>.4S, <Vn>.4H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv2i32_shift | ushll v16.2d, v16.2s, #31 // USHLL <Vd>.2D, <Vn>.2S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #3 // USHLL2 <Vd>.8H, <Vn>.16B, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv8i16_shift | ushll2 v18.4s, v22.8h, #13 // USHLL2 <Vd>.4S, <Vn>.8H, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv4i32_shift | ushll2 v31.2d, v12.4s, #11 // USHLL2 <Vd>.2D, <Vn>.4S, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHRd | ushr d23, d22, #58 // USHR <V><d>, <V><n>, #<shift> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHRv8i8_shift | ushr v24.8b, v0.8b, #2 // USHR <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHRv8i16_shift | ushr v21.8h, v31.8h, #11 // USHR <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHRv2i32_shift | ushr v27.2s, v24.2s, #14 // USHR <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHRv2i64_shift | ushr v0.2d, v27.2d, #48 // USHR <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 3 | 1 | 4.00 | V1UnitSVE01, V1UnitV | USMMLA | usmmla v25.4s, v10.16b, v11.16b // USMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B \\ ASIMD matrix multiply-accumulate \\ 1 3 1 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USQADDv1i16 | usqadd h14, h13 // USQADD <V><d>, <V><n> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USQADDv2i64 | usqadd v18.2d, v23.2d // USQADD <Vd>.<T>, <Vn>.<T> \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USRAd | usra d22, d24, #9 // USRA <V><d>, <V><n>, #<shift> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USRAv16i8_shift | usra v16.16b, v5.16b, #5 // USRA <Vd>.<Tb>, <Vn>.<Tb>, #<shiftb> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USRAv4i16_shift | usra v18.4h, v22.4h, #11 // USRA <Vd>.<Th>, <Vn>.<Th>, #<shifth> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USRAv2i32_shift | usra v13.2s, v12.2s, #24 // USRA <Vd>.<Ts>, <Vn>.<Ts>, #<shifts> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 4 | 1 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USRAv2i64_shift | usra v30.2d, v30.2d, #41 // USRA <Vd>.<Td>, <Vn>.<Td>, #<shiftd> \\ ASIMD shift accumulate \\ 1 4 1 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USUBLv4i16_v4i32 | usubl v22.4s, v18.4h, v3.4h // USUBL <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USUBLv16i8_v8i16 | usubl2 v12.8h, v23.16b, v15.16b // USUBL2 <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USUBWv8i8_v8i16 | usubw v30.8h, v12.8h, v20.8b // USUBW <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | USUBWv8i16_v4i32 | usubw2 v2.4s, v0.4s, v30.8h // USUBW2 <Vd>.<Ta>, <Vn>.<Ta>, <Vm>.<Tb> \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UUNPKHI_ZZ_D | uunpkhi z26.d, z26.s // UUNPKHI <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
+# CHECK-NEXT: 2 | 2 | 2 | 2.00 | V1UnitSVE01[2], V1UnitV[2] | UUNPKLO_ZZ_S | uunpklo z10.s, z11.h // UUNPKLO <Zd>.<T>, <Zn>.<Tb> \\ Unpack and extend \\ 2 2 2 2.0 V1UnitSVE01[2],V1UnitSVE01[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxtb w2, w23 // UXTB <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTB_ZPmZ_D | uxtb z1.d, p2/m, z11.d // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTH_ZPmZ_S | uxth z6.s, p3/m, z18.s // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
-# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitV, V1UnitV1, V1UnitV01, V1UnitV13 | UXTW_ZPmZ_D | uxtw z23.d, p4/m, z3.d // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 1 2 2 1.0 V1UnitV1
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UXTB_ZPmZ_D | uxtb z1.d, p2/m, z11.d // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UXTH_ZPmZ_S | uxth z6.s, p3/m, z18.s // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
+# CHECK-NEXT: 2 | 2 | 2 | 1.00 | V1UnitSVE1[2], V1UnitSVE01[2], V1UnitV[2], V1UnitV13[2] | UXTW_ZPmZ_D | uxtw z23.d, p4/m, z3.d // UXTW <Zd>.D, <Pg>/M, <Zn>.D \\ Extend, sign or zero \\ 2 2 2 1.0 V1UnitSVE1[2],V1UnitSVE1[2]
# CHECK-NEXT: 1 | 1 | 1 | 4.00 | V1UnitI | UBFMWri | uxth w7, w14 // UXTH <Wd>, <Wn> \\ Sign/zero extend, normal \\ 1 1 1 4.0 V1UnitI
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v1.4s, v22.4h, #0 // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #0 // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP1v2i32 | uzp1 v9.2s, v29.2s, v20.2s // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv4i16_shift | ushll v1.4s, v22.4h, #0 // UXTL <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 2.00 | V1UnitSVE1, V1UnitSVE01, V1UnitV, V1UnitV13 | USHLLv16i8_shift | ushll2 v14.8h, v3.16b, #0 // UXTL2 <Vd>.<Ta>, <Vn>.<Tb> \\ ASIMD shift by immed, basic \\ 1 2 2 2.0 V1UnitV13
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UZP1v2i32 | uzp1 v9.2s, v29.2s, v20.2s // UZP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP1_PPP_D | uzp1 p5.d, p3.d, p5.d // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | UZP2_PPP_S | uzp2 p6.s, p0.s, p6.s // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | UZP2v4i32 | uzp2 v18.4s, v12.4s, v31.4s // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | UZP2v4i32 | uzp2 v18.4s, v12.4s, v31.4s // UZP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfe // WFE \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | wfi // WFI \\ No description \\ No scheduling info
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELE_PXX_H | whilele p6.h, x28, x30 // WHILELE <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
@@ -7579,10 +7582,10 @@ test:
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELS_PWW_B | whilels p4.b, w4, w20 // WHILELS <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 3 | 3 | 0.50 | V1UnitI[2], V1UnitM[2], V1UnitM0[2] | WHILELT_PXX_S | whilelt p7.s, x20, x6 // WHILELT <Pd>.<T>, <R><n>, <R><m> \\ Loop control, based on GPR \\ 1 3 3 0.5 V1UnitM0[2]
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | WRFFR | wrffr p7.b // WRFFR <Pn>.B \\ Write to first fault register \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv8i8 | xtn v20.8b, v17.8h // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | XTNv16i8 | xtn2 v31.16b, v26.8h // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | XTNv8i8 | xtn v20.8b, v17.8h // XTN <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | XTNv16i8 | xtn2 v31.16b, v26.8h // XTN2 <Vd>.<Tb>, <Vn>.<Ta> \\ ASIMD extract narrow \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 1 | 1 | 15.00 | | HINT | yield // YIELD \\ No description \\ No scheduling info
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP1v2i64 | zip1 v21.2d, v4.2d, v11.2d // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ZIP1v2i64 | zip1 v21.2d, v4.2d, v11.2d // ZIP1 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP1_PPP_D | zip1 p0.d, p1.d, p4.d // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
# CHECK-NEXT: 1 | 2 | 2 | 1.00 | V1UnitI, V1UnitM, V1UnitM0 | ZIP2_PPP_S | zip2 p3.s, p5.s, p4.s // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> \\ Predicate zip/unzip \\ 1 2 2 1.0 V1UnitM0
-# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitV | ZIP2v4i32 | zip2 v2.4s, v20.4s, v5.4s // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
+# CHECK-NEXT: 1 | 2 | 2 | 4.00 | V1UnitSVE01, V1UnitV | ZIP2v4i32 | zip2 v2.4s, v20.4s, v5.4s // ZIP2 <Vd>.<T>, <Vn>.<T>, <Vm>.<T> \\ ASIMD unzip/zip \\ 1 2 2 4.0 V1UnitV
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
index bc336ab90e8b462..8f7567f3c85d7e3 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
@@ -2415,58 +2415,58 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 2 0.50 abs z0.b, p0/m, z0.b
-# CHECK-NEXT: 1 2 0.50 abs z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 0.50 abs z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 2 0.50 abs z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 0.50 abs z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 abs z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 abs z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 abs z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 add z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 add z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 add z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 add z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 add z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 add z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 add z0.s, z1.s, z2.s
-# CHECK-NEXT: 1 2 0.50 add z21.b, p5/m, z21.b, z10.b
-# CHECK-NEXT: 1 2 0.50 add z21.b, z10.b, z21.b
-# CHECK-NEXT: 1 2 0.50 add z21.d, p5/m, z21.d, z10.d
-# CHECK-NEXT: 1 2 0.50 add z21.d, z10.d, z21.d
-# CHECK-NEXT: 1 2 0.50 add z21.h, p5/m, z21.h, z10.h
-# CHECK-NEXT: 1 2 0.50 add z21.h, z10.h, z21.h
-# CHECK-NEXT: 1 2 0.50 add z21.s, p5/m, z21.s, z10.s
-# CHECK-NEXT: 1 2 0.50 add z21.s, z10.s, z21.s
-# CHECK-NEXT: 1 2 0.50 add z23.b, p3/m, z23.b, z13.b
-# CHECK-NEXT: 1 2 0.50 add z23.b, z13.b, z8.b
-# CHECK-NEXT: 1 2 0.50 add z23.d, p3/m, z23.d, z13.d
-# CHECK-NEXT: 1 2 0.50 add z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 add z23.h, p3/m, z23.h, z13.h
-# CHECK-NEXT: 1 2 0.50 add z23.h, z13.h, z8.h
-# CHECK-NEXT: 1 2 0.50 add z23.s, p3/m, z23.s, z13.s
-# CHECK-NEXT: 1 2 0.50 add z23.s, z13.s, z8.s
-# CHECK-NEXT: 1 2 0.50 add z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 add z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 add z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 add z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 add z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 add z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 add z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 add z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 add z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 add z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 add z31.s, z31.s, #65280
-# CHECK-NEXT: 1 2 0.50 add z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: 2 2 0.50 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 0.50 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 2 0.50 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 0.50 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 add z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 add z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 add z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 add z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 add z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 add z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 add z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 add z0.s, z1.s, z2.s
+# CHECK-NEXT: 2 2 0.50 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 2 2 0.50 add z21.b, z10.b, z21.b
+# CHECK-NEXT: 2 2 0.50 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 2 2 0.50 add z21.d, z10.d, z21.d
+# CHECK-NEXT: 2 2 0.50 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 2 2 0.50 add z21.h, z10.h, z21.h
+# CHECK-NEXT: 2 2 0.50 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 2 2 0.50 add z21.s, z10.s, z21.s
+# CHECK-NEXT: 2 2 0.50 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 2 2 0.50 add z23.b, z13.b, z8.b
+# CHECK-NEXT: 2 2 0.50 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 2 2 0.50 add z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 2 2 0.50 add z23.h, z13.h, z8.h
+# CHECK-NEXT: 2 2 0.50 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 2 2 0.50 add z23.s, z13.s, z8.s
+# CHECK-NEXT: 2 2 0.50 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 add z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 add z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 add z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 add z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 add z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 add z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 add z31.s, z31.s, #65280
+# CHECK-NEXT: 2 2 0.50 add z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 addpl sp, sp, #31
# CHECK-NEXT: 1 2 1.00 addpl x0, x0, #-32
# CHECK-NEXT: 1 2 1.00 addpl x21, x21, #0
@@ -2475,104 +2475,104 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 addvl x0, x0, #-32
# CHECK-NEXT: 1 2 1.00 addvl x21, x21, #0
# CHECK-NEXT: 1 2 1.00 addvl x23, x8, #-1
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #1]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #2]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #3]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #1]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #2]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #3]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #1]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #2]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #3]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw]
-# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d]
-# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #1]
-# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #2]
-# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #3]
-# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: 2 2 0.50 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: 2 2 0.50 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: 2 2 0.50 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: 2 2 0.50 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: 2 2 0.50 adr z0.s, [z0.s, z0.s]
# CHECK-NEXT: 1 1 1.00 and p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, #0x6
-# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 and z0.s, z0.s, #0x6
-# CHECK-NEXT: 1 2 0.50 and z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: 1 2 0.50 and z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 and z23.h, z23.h, #0x6
-# CHECK-NEXT: 1 2 0.50 and z23.h, z23.h, #0xfff9
-# CHECK-NEXT: 1 2 0.50 and z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 and z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 and z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 and z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0x6
-# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0xf9
+# CHECK-NEXT: 2 2 0.50 and z0.d, z0.d, #0x6
+# CHECK-NEXT: 2 2 0.50 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 2 2 0.50 and z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 and z0.s, z0.s, #0x6
+# CHECK-NEXT: 2 2 0.50 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 2 2 0.50 and z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 and z23.h, z23.h, #0x6
+# CHECK-NEXT: 2 2 0.50 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 2 2 0.50 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 and z5.b, z5.b, #0x6
+# CHECK-NEXT: 2 2 0.50 and z5.b, z5.b, #0xf9
# CHECK-NEXT: 1 2 2.00 ands p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 12 2.00 andv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 andv d0, p7, z31.d
-# CHECK-NEXT: 1 12 2.00 andv h0, p7, z31.h
-# CHECK-NEXT: 1 12 2.00 andv s0, p7, z31.s
-# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: 1 2 1.00 asr z0.b, z0.b, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.b, z1.b, z2.d
-# CHECK-NEXT: 1 2 1.00 asr z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 asr z0.d, z0.d, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: 1 2 1.00 asr z0.h, z0.h, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.h, z1.h, z2.d
-# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: 1 2 1.00 asr z0.s, z0.s, #1
-# CHECK-NEXT: 1 2 1.00 asr z0.s, z1.s, z2.d
-# CHECK-NEXT: 1 2 1.00 asr z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: 1 2 1.00 asr z31.b, z31.b, #8
-# CHECK-NEXT: 1 2 1.00 asr z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: 1 2 1.00 asr z31.d, z31.d, #64
-# CHECK-NEXT: 1 2 1.00 asr z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: 1 2 1.00 asr z31.h, z31.h, #16
-# CHECK-NEXT: 1 2 1.00 asr z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 asr z31.s, z31.s, #32
-# CHECK-NEXT: 1 4 1.00 asrd z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: 1 4 1.00 asrd z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: 1 4 1.00 asrd z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: 1 4 1.00 asrd z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: 1 4 1.00 asrd z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: 1 4 1.00 asrd z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: 1 4 1.00 asrd z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: 1 4 1.00 asrd z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 asrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 asrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 asrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 asrr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 4 1.00 bfcvt z0.h, p0/m, z1.s
-# CHECK-NEXT: 1 4 1.00 bfcvtnt z0.h, p0/m, z1.s
-# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h
-# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: 1 4 0.50 bfdot z0.s, z1.h, z2.h[3]
-# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h
-# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h[7]
-# CHECK-NEXT: 1 5 0.50 bfmlalb z10.s, z21.h, z14.h
-# CHECK-NEXT: 1 5 0.50 bfmlalb z21.s, z14.h, z3.h[2]
-# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h
-# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h[7]
-# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z7.h[7]
-# CHECK-NEXT: 1 5 0.50 bfmlalt z14.s, z10.h, z21.h
-# CHECK-NEXT: 1 5 0.50 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 12 2.00 andv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 andv d0, p7, z31.d
+# CHECK-NEXT: 2 12 2.00 andv h0, p7, z31.h
+# CHECK-NEXT: 2 12 2.00 andv s0, p7, z31.s
+# CHECK-NEXT: 2 2 1.00 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 2 2 1.00 asr z0.b, z0.b, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: 2 2 1.00 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 asr z0.d, z0.d, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 2 2 1.00 asr z0.h, z0.h, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: 2 2 1.00 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 1.00 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 2 2 1.00 asr z0.s, z0.s, #1
+# CHECK-NEXT: 2 2 1.00 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: 2 2 1.00 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 2 2 1.00 asr z31.b, z31.b, #8
+# CHECK-NEXT: 2 2 1.00 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 2 2 1.00 asr z31.d, z31.d, #64
+# CHECK-NEXT: 2 2 1.00 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 2 2 1.00 asr z31.h, z31.h, #16
+# CHECK-NEXT: 2 2 1.00 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 2 2 1.00 asr z31.s, z31.s, #32
+# CHECK-NEXT: 2 4 1.00 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 2 4 1.00 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 2 4 1.00 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 2 4 1.00 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 2 4 1.00 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 2 4 1.00 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 2 4 1.00 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 2 4 1.00 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 2 2 1.00 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 4 1.00 bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: 2 4 1.00 bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 2 4 0.50 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 4 0.50 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 2 4 0.50 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: 2 5 0.50 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 5 0.50 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 2 5 0.50 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 2 5 0.50 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: 2 5 0.50 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: 2 5 0.50 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 5 0.50 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 2 5 0.50 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 2 5 0.50 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 2 5 0.50 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: 2 5 0.50 bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 1 1 1.00 bic p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 bic p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 2 0.50 bic z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 bic z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 bic z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 bic z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 bic z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 bic z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: 1 2 2.00 bics p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 2 2.00 bics p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 brka p0.b, p15/m, p15.b
@@ -2593,196 +2593,196 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 brkpb p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 3 2.00 brkpbs p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: 1 3 2.00 brkpbs p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 3 1.00 clasta b0, p7, b0, z31.b
-# CHECK-NEXT: 1 3 1.00 clasta d0, p7, d0, z31.d
-# CHECK-NEXT: 1 3 1.00 clasta h0, p7, h0, z31.h
-# CHECK-NEXT: 1 3 1.00 clasta s0, p7, s0, z31.s
-# CHECK-NEXT: 2 9 1.00 clasta w0, p7, w0, z31.b
-# CHECK-NEXT: 2 9 1.00 clasta w0, p7, w0, z31.h
-# CHECK-NEXT: 2 9 1.00 clasta w0, p7, w0, z31.s
-# CHECK-NEXT: 2 9 1.00 clasta x0, p7, x0, z31.d
-# CHECK-NEXT: 1 3 1.00 clasta z0.b, p7, z0.b, z31.b
-# CHECK-NEXT: 1 3 1.00 clasta z0.d, p7, z0.d, z31.d
-# CHECK-NEXT: 1 3 1.00 clasta z0.h, p7, z0.h, z31.h
-# CHECK-NEXT: 1 3 1.00 clasta z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: 1 3 1.00 clastb b0, p7, b0, z31.b
-# CHECK-NEXT: 1 3 1.00 clastb d0, p7, d0, z31.d
-# CHECK-NEXT: 1 3 1.00 clastb h0, p7, h0, z31.h
-# CHECK-NEXT: 1 3 1.00 clastb s0, p7, s0, z31.s
-# CHECK-NEXT: 2 9 1.00 clastb w0, p7, w0, z31.b
-# CHECK-NEXT: 2 9 1.00 clastb w0, p7, w0, z31.h
-# CHECK-NEXT: 2 9 1.00 clastb w0, p7, w0, z31.s
-# CHECK-NEXT: 2 9 1.00 clastb x0, p7, x0, z31.d
-# CHECK-NEXT: 1 3 1.00 clastb z0.b, p7, z0.b, z31.b
-# CHECK-NEXT: 1 3 1.00 clastb z0.d, p7, z0.d, z31.d
-# CHECK-NEXT: 1 3 1.00 clastb z0.h, p7, z0.h, z31.h
-# CHECK-NEXT: 1 3 1.00 clastb z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 cls z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 cls z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 cls z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 cls z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 clz z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 clz z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 clz z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 clz z31.s, p7/m, z31.s
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpeq p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmpge p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmpge p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmpge p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpge p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpge p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpge p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmpge p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmpge p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpge p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmpge p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmpge p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpge p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpge p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmpge p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmpge p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpge p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmpge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmpgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmphi p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: 2 4 1.00 cmphi p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: 2 4 1.00 cmphi p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmphi p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphi p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmphi p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: 2 4 1.00 cmphi p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: 2 4 1.00 cmphi p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphi p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphi p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: 2 4 1.00 cmphi p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: 2 4 1.00 cmphi p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphi p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmphi p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmphi p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: 2 4 1.00 cmphi p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: 2 4 1.00 cmphi p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphi p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmphi p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmphs p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: 2 4 1.00 cmphs p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: 2 4 1.00 cmphs p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmphs p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphs p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmphs p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: 2 4 1.00 cmphs p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: 2 4 1.00 cmphs p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphs p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphs p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: 2 4 1.00 cmphs p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: 2 4 1.00 cmphs p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphs p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmphs p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmphs p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: 2 4 1.00 cmphs p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: 2 4 1.00 cmphs p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmphs p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmphs p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 2 4 1.00 cmple p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmple p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmple p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmple p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmple p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmple p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmple p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmple p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmple p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmple p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmple p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplo p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: 2 4 1.00 cmplo p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: 2 4 1.00 cmplo p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplo p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: 2 4 1.00 cmplo p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: 2 4 1.00 cmplo p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: 2 4 1.00 cmplo p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: 2 4 1.00 cmplo p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplo p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: 2 4 1.00 cmplo p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: 2 4 1.00 cmplo p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpls p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: 2 4 1.00 cmpls p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: 2 4 1.00 cmpls p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpls p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: 2 4 1.00 cmpls p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: 2 4 1.00 cmpls p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: 2 4 1.00 cmpls p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: 2 4 1.00 cmpls p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpls p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: 2 4 1.00 cmpls p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: 2 4 1.00 cmpls p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplt p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmplt p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmplt p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplt p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmplt p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmplt p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmplt p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmplt p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmplt p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmplt p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmplt p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpne p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: 2 4 1.00 cmpne p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: 2 4 1.00 cmpne p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: 2 4 1.00 cmpne p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpne p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: 2 4 1.00 cmpne p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: 2 4 1.00 cmpne p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpne p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: 2 4 1.00 cmpne p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: 2 4 1.00 cmpne p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpne p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: 2 4 1.00 cmpne p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: 2 4 1.00 cmpne p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: 2 4 1.00 cmpne p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: 2 4 1.00 cmpne p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 cnot z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 cnot z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 cnot z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 cnot z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 cnt z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 cnt z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 cnt z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 clasta b0, p7, b0, z31.b
+# CHECK-NEXT: 2 3 1.00 clasta d0, p7, d0, z31.d
+# CHECK-NEXT: 2 3 1.00 clasta h0, p7, h0, z31.h
+# CHECK-NEXT: 2 3 1.00 clasta s0, p7, s0, z31.s
+# CHECK-NEXT: 3 9 1.00 clasta w0, p7, w0, z31.b
+# CHECK-NEXT: 3 9 1.00 clasta w0, p7, w0, z31.h
+# CHECK-NEXT: 3 9 1.00 clasta w0, p7, w0, z31.s
+# CHECK-NEXT: 3 9 1.00 clasta x0, p7, x0, z31.d
+# CHECK-NEXT: 2 3 1.00 clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 2 3 1.00 clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 2 3 1.00 clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 2 3 1.00 clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 2 3 1.00 clastb b0, p7, b0, z31.b
+# CHECK-NEXT: 2 3 1.00 clastb d0, p7, d0, z31.d
+# CHECK-NEXT: 2 3 1.00 clastb h0, p7, h0, z31.h
+# CHECK-NEXT: 2 3 1.00 clastb s0, p7, s0, z31.s
+# CHECK-NEXT: 3 9 1.00 clastb w0, p7, w0, z31.b
+# CHECK-NEXT: 3 9 1.00 clastb w0, p7, w0, z31.h
+# CHECK-NEXT: 3 9 1.00 clastb w0, p7, w0, z31.s
+# CHECK-NEXT: 3 9 1.00 clastb x0, p7, x0, z31.d
+# CHECK-NEXT: 2 3 1.00 clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 2 3 1.00 clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 2 3 1.00 clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 2 3 1.00 clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 3 4 1.00 cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 3 4 1.00 cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 3 4 1.00 cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 3 4 1.00 cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 3 4 1.00 cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 3 4 1.00 cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 3 4 1.00 cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 3 4 1.00 cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 3 4 1.00 cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 3 4 1.00 cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 3 4 1.00 cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 3 4 1.00 cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 3 4 1.00 cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 3 4 1.00 cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 3 4 1.00 cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 3 4 1.00 cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 3 4 1.00 cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 3 4 1.00 cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 3 4 1.00 cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 3 4 1.00 cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 3 4 1.00 cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 3 4 1.00 cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 3 4 1.00 cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 3 4 1.00 cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 3 4 1.00 cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 3 4 1.00 cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 3 4 1.00 cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 3 4 1.00 cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 3 4 1.00 cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 3 4 1.00 cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 3 4 1.00 cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 3 4 1.00 cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 3 4 1.00 cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 3 4 1.00 cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 3 4 1.00 cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 3 4 1.00 cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 3 4 1.00 cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 3 4 1.00 cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 3 4 1.00 cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 3 4 1.00 cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 3 4 1.00 cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 3 4 1.00 cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 3 4 1.00 cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 3 4 1.00 cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 cnt z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 1.00 cntb x0
# CHECK-NEXT: 1 2 1.00 cntb x0, #28
# CHECK-NEXT: 1 2 1.00 cntb x0, all, mul #16
@@ -2803,12 +2803,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 cntw x0, #28
# CHECK-NEXT: 1 2 1.00 cntw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 cntw x0, pow2
-# CHECK-NEXT: 1 3 1.00 compact z31.d, p7, z31.d
-# CHECK-NEXT: 1 3 1.00 compact z31.s, p7, z31.s
-# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, w0
-# CHECK-NEXT: 2 5 1.00 mov z31.d, p7/m, sp
-# CHECK-NEXT: 2 5 1.00 mov z31.h, p7/m, w0
-# CHECK-NEXT: 2 5 1.00 mov z31.s, p7/m, wsp
+# CHECK-NEXT: 2 3 1.00 compact z31.d, p7, z31.d
+# CHECK-NEXT: 2 3 1.00 compact z31.s, p7, z31.s
+# CHECK-NEXT: 3 5 1.00 mov z31.b, p7/m, w0
+# CHECK-NEXT: 3 5 1.00 mov z31.d, p7/m, sp
+# CHECK-NEXT: 3 5 1.00 mov z31.h, p7/m, w0
+# CHECK-NEXT: 3 5 1.00 mov z31.s, p7/m, wsp
# CHECK-NEXT: 1 1 1.00 ctermeq w30, wzr
# CHECK-NEXT: 1 1 1.00 ctermeq wzr, w30
# CHECK-NEXT: 1 1 1.00 ctermeq x30, xzr
@@ -2840,356 +2840,356 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.d
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.h
# CHECK-NEXT: 1 2 1.00 decp xzr, p15.s
-# CHECK-NEXT: 2 7 2.00 decp z31.d, p15.d
-# CHECK-NEXT: 2 7 2.00 decp z31.h, p15.h
-# CHECK-NEXT: 2 7 2.00 decp z31.s, p15.s
+# CHECK-NEXT: 3 7 2.00 decp z31.d, p15.d
+# CHECK-NEXT: 3 7 2.00 decp z31.h, p15.h
+# CHECK-NEXT: 3 7 2.00 decp z31.s, p15.s
# CHECK-NEXT: 1 2 1.00 decw x0
# CHECK-NEXT: 1 2 1.00 decw x0, #14
# CHECK-NEXT: 1 2 1.00 decw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 decw x0, pow2
# CHECK-NEXT: 1 2 1.00 decw x0, vl1
-# CHECK-NEXT: 1 2 0.50 mov z0.b, #0
-# CHECK-NEXT: 1 2 0.50 mov z0.d, #256
-# CHECK-NEXT: 1 2 0.50 mov z31.h, #127
-# CHECK-NEXT: 1 2 0.50 mov z31.s, #512
+# CHECK-NEXT: 2 2 0.50 mov z0.b, #0
+# CHECK-NEXT: 2 2 0.50 mov z0.d, #256
+# CHECK-NEXT: 2 2 0.50 mov z31.h, #127
+# CHECK-NEXT: 2 2 0.50 mov z31.s, #512
# CHECK-NEXT: 1 3 1.00 mov z0.b, w0
# CHECK-NEXT: 1 3 1.00 mov z0.d, x0
# CHECK-NEXT: 1 3 1.00 mov z31.h, wsp
# CHECK-NEXT: 1 3 1.00 mov z31.s, wsp
-# CHECK-NEXT: 1 2 0.50 dupm z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: 1 2 0.50 dupm z0.s, #0xfffffff9
-# CHECK-NEXT: 1 2 0.50 dupm z23.h, #0xfff9
-# CHECK-NEXT: 1 2 0.50 dupm z5.b, #0xf9
+# CHECK-NEXT: 2 2 0.50 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 2 2 0.50 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: 2 2 0.50 dupm z23.h, #0xfff9
+# CHECK-NEXT: 2 2 0.50 dupm z5.b, #0xf9
# CHECK-NEXT: 1 1 1.00 eor p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, #0x6
-# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 eor z0.s, z0.s, #0x6
-# CHECK-NEXT: 1 2 0.50 eor z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: 1 2 0.50 eor z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 eor z23.h, z23.h, #0x6
-# CHECK-NEXT: 1 2 0.50 eor z23.h, z23.h, #0xfff9
-# CHECK-NEXT: 1 2 0.50 eor z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 eor z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 eor z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 eor z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0x6
-# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: 2 2 0.50 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: 2 2 0.50 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 2 2 0.50 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: 2 2 0.50 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 2 2 0.50 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: 2 2 0.50 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 2 2 0.50 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: 2 2 0.50 eor z5.b, z5.b, #0xf9
# CHECK-NEXT: 1 2 2.00 eors p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 12 2.00 eorv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 eorv d0, p7, z31.d
-# CHECK-NEXT: 1 12 2.00 eorv h0, p7, z31.h
-# CHECK-NEXT: 1 12 2.00 eorv s0, p7, z31.s
-# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #255
-# CHECK-NEXT: 1 2 0.50 fabd z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fabd z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fabd z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fabs z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 fabs z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 fabs z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 1 2 0.50 fadd z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: 1 2 0.50 fadd z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fadd z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fadd z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: 1 2 0.50 fadd z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fadd z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fadd z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: 1 2 0.50 fadd z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fadd z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fadd z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fadd z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fadd z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 8 1.50 fadda d0, p7, d0, z31.d
-# CHECK-NEXT: 1 19 18.00 fadda h0, p7, h0, z31.h
-# CHECK-NEXT: 1 11 10.00 fadda s0, p7, s0, z31.s
-# CHECK-NEXT: 1 9 2.00 faddv d0, p7, z31.d
-# CHECK-NEXT: 1 13 3.00 faddv h0, p7, z31.h
-# CHECK-NEXT: 1 11 2.50 faddv s0, p7, z31.s
-# CHECK-NEXT: 1 3 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
-# CHECK-NEXT: 1 3 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
-# CHECK-NEXT: 1 3 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
-# CHECK-NEXT: 1 3 0.50 fcadd z31.d, p7/m, z31.d, z31.d, #270
-# CHECK-NEXT: 1 3 0.50 fcadd z31.h, p7/m, z31.h, z31.h, #270
-# CHECK-NEXT: 1 3 0.50 fcadd z31.s, p7/m, z31.s, z31.s, #270
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: 1 5 0.50 fcmla z0.d, p0/m, z0.d, z0.d, #0
-# CHECK-NEXT: 1 5 0.50 fcmla z0.d, p0/m, z1.d, z2.d, #90
-# CHECK-NEXT: 1 5 0.50 fcmla z0.h, p0/m, z0.h, z0.h, #0
-# CHECK-NEXT: 1 5 0.50 fcmla z0.h, p0/m, z1.h, z2.h, #90
-# CHECK-NEXT: 1 5 0.50 fcmla z0.h, z0.h, z0.h[0], #0
-# CHECK-NEXT: 1 5 0.50 fcmla z0.s, p0/m, z0.s, z0.s, #0
-# CHECK-NEXT: 1 5 0.50 fcmla z0.s, p0/m, z1.s, z2.s, #90
-# CHECK-NEXT: 1 5 0.50 fcmla z21.s, z10.s, z5.s[1], #90
-# CHECK-NEXT: 1 5 0.50 fcmla z23.s, z13.s, z8.s[0], #270
-# CHECK-NEXT: 1 5 0.50 fcmla z29.d, p7/m, z30.d, z31.d, #180
-# CHECK-NEXT: 1 5 0.50 fcmla z29.h, p7/m, z30.h, z31.h, #180
-# CHECK-NEXT: 1 5 0.50 fcmla z29.s, p7/m, z30.s, z31.s, #180
-# CHECK-NEXT: 1 5 0.50 fcmla z31.d, p7/m, z31.d, z31.d, #270
-# CHECK-NEXT: 1 5 0.50 fcmla z31.h, p7/m, z31.h, z31.h, #270
-# CHECK-NEXT: 1 5 0.50 fcmla z31.h, z31.h, z7.h[3], #270
-# CHECK-NEXT: 1 5 0.50 fcmla z31.s, p7/m, z31.s, z31.s, #270
-# CHECK-NEXT: 1 2 1.00 fcmle p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmle p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmle p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmlt p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmlt p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmlt p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 2 1.00 fcmuo p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: 1 2 1.00 fcmuo p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: 1 2 1.00 fcmuo p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.s
-# CHECK-NEXT: 1 3 1.00 fcvt z0.h, p0/m, z0.d
-# CHECK-NEXT: 1 4 2.00 fcvt z0.h, p0/m, z0.s
-# CHECK-NEXT: 1 3 1.00 fcvt z0.s, p0/m, z0.d
-# CHECK-NEXT: 1 4 2.00 fcvt z0.s, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 fcvtzs z0.d, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 fcvtzs z0.d, p0/m, z0.s
-# CHECK-NEXT: 1 6 4.00 fcvtzs z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvtzs z0.s, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 fcvtzs z0.s, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 fcvtzs z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 fcvtzu z0.d, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 fcvtzu z0.d, p0/m, z0.s
-# CHECK-NEXT: 1 6 4.00 fcvtzu z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 3 1.00 fcvtzu z0.s, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 fcvtzu z0.s, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 fcvtzu z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 15 14.00 fdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 13 12.00 fdiv z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 10 9.00 fdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 15 14.00 fdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 13 12.00 fdivr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 10 9.00 fdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 3 0.50 fexpa z0.d, z31.d
-# CHECK-NEXT: 1 3 0.50 fexpa z0.h, z31.h
-# CHECK-NEXT: 1 3 0.50 fexpa z0.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fmad z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fmad z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fmad z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fmax z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: 1 2 0.50 fmax z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fmax z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: 1 2 0.50 fmax z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fmax z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: 1 2 0.50 fmax z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fmax z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fmax z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fmax z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 9 2.00 fmaxnmv d0, p7, z31.d
-# CHECK-NEXT: 1 13 3.00 fmaxnmv h0, p7, z31.h
-# CHECK-NEXT: 1 11 2.50 fmaxnmv s0, p7, z31.s
-# CHECK-NEXT: 1 9 2.00 fmaxv d0, p7, z31.d
-# CHECK-NEXT: 1 13 3.00 fmaxv h0, p7, z31.h
-# CHECK-NEXT: 1 11 2.50 fmaxv s0, p7, z31.s
-# CHECK-NEXT: 1 2 0.50 fmin z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: 1 2 0.50 fmin z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fmin z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: 1 2 0.50 fmin z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fmin z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: 1 2 0.50 fmin z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fmin z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fmin z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fmin z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 2 0.50 fminnm z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: 1 2 0.50 fminnm z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fminnm z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: 1 2 0.50 fminnm z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fminnm z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: 1 2 0.50 fminnm z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fminnm z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fminnm z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fminnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 9 2.00 fminnmv d0, p7, z31.d
-# CHECK-NEXT: 1 13 3.00 fminnmv h0, p7, z31.h
-# CHECK-NEXT: 1 11 2.50 fminnmv s0, p7, z31.s
-# CHECK-NEXT: 1 9 2.00 fminv d0, p7, z31.d
-# CHECK-NEXT: 1 13 3.00 fminv h0, p7, z31.h
-# CHECK-NEXT: 1 11 2.50 fminv s0, p7, z31.s
-# CHECK-NEXT: 1 4 0.50 fmla z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fmla z0.d, z1.d, z7.d[1]
-# CHECK-NEXT: 1 4 0.50 fmla z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fmla z0.h, z1.h, z7.h[7]
-# CHECK-NEXT: 1 4 0.50 fmla z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fmla z0.s, z1.s, z7.s[3]
-# CHECK-NEXT: 1 4 0.50 fmls z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fmls z0.d, z1.d, z7.d[1]
-# CHECK-NEXT: 1 4 0.50 fmls z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fmls z0.h, z1.h, z7.h[7]
-# CHECK-NEXT: 1 4 0.50 fmls z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fmls z0.s, z1.s, z7.s[3]
-# CHECK-NEXT: 1 2 0.50 fmov z0.d, #-10.00000000
-# CHECK-NEXT: 1 2 0.50 fmov z0.d, #0.12500000
-# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #-10.00000000
-# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #0.12500000
-# CHECK-NEXT: 1 2 0.50 fmov z0.h, #-0.12500000
-# CHECK-NEXT: 1 2 0.50 fmov z0.h, p0/m, #-0.12500000
-# CHECK-NEXT: 1 2 0.50 fmov z0.s, #-0.12500000
-# CHECK-NEXT: 1 2 0.50 fmov z0.s, p0/m, #-0.12500000
-# CHECK-NEXT: 1 4 0.50 fmsb z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fmsb z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fmsb z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 3 0.50 fmul z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: 1 3 0.50 fmul z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 3 0.50 fmul z0.d, z0.d, z0.d[0]
-# CHECK-NEXT: 1 3 0.50 fmul z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 3 0.50 fmul z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: 1 3 0.50 fmul z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 3 0.50 fmul z0.h, z0.h, z0.h[0]
-# CHECK-NEXT: 1 3 0.50 fmul z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 3 0.50 fmul z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: 1 3 0.50 fmul z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 3 0.50 fmul z0.s, z0.s, z0.s[0]
-# CHECK-NEXT: 1 3 0.50 fmul z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 3 0.50 fmul z31.d, p7/m, z31.d, #2.0
-# CHECK-NEXT: 1 3 0.50 fmul z31.d, z31.d, z15.d[1]
-# CHECK-NEXT: 1 3 0.50 fmul z31.h, p7/m, z31.h, #2.0
-# CHECK-NEXT: 1 3 0.50 fmul z31.h, z31.h, z7.h[7]
-# CHECK-NEXT: 1 3 0.50 fmul z31.s, p7/m, z31.s, #2.0
-# CHECK-NEXT: 1 3 0.50 fmul z31.s, z31.s, z7.s[3]
-# CHECK-NEXT: 1 3 0.50 fmulx z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 3 0.50 fmulx z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 3 0.50 fmulx z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fneg z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 fneg z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 fneg z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 4 0.50 fnmad z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fnmad z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fnmad z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fnmla z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fnmla z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fnmla z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fnmls z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fnmls z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fnmls z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 4 0.50 fnmsb z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 3 1.00 frecpe z0.d, z31.d
-# CHECK-NEXT: 1 6 1.00 frecpe z0.h, z31.h
-# CHECK-NEXT: 1 4 1.00 frecpe z0.s, z31.s
-# CHECK-NEXT: 1 4 0.50 frecps z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 frecps z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 frecps z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 3 1.00 frecpx z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 3 1.00 frecpx z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 3 1.00 frecpx z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frinta z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frinta z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frinta z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frinti z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frinti z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frinti z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frintm z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frintm z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frintm z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frintn z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frintn z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frintn z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frintp z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frintp z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frintp z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frintx z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frintx z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frintx z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frintz z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 6 1.00 frintz z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 4 1.00 frintz z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 frsqrte z0.d, z31.d
-# CHECK-NEXT: 1 6 1.00 frsqrte z0.h, z31.h
-# CHECK-NEXT: 1 4 1.00 frsqrte z0.s, z31.s
-# CHECK-NEXT: 1 4 0.50 frsqrts z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 4 0.50 frsqrts z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 4 0.50 frsqrts z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 3 0.50 fscale z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 3 0.50 fscale z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 3 0.50 fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 fsub z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: 1 2 0.50 fsub z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fsub z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fsub z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: 1 2 0.50 fsub z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fsub z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fsub z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: 1 2 0.50 fsub z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fsub z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fsub z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fsub z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fsub z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 2 0.50 fsubr z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: 1 2 0.50 fsubr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 fsubr z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: 1 2 0.50 fsubr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 fsubr z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: 1 2 0.50 fsubr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 fsubr z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: 1 2 0.50 fsubr z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: 1 2 0.50 fsubr z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: 1 3 0.50 ftmad z0.d, z0.d, z31.d, #7
-# CHECK-NEXT: 1 3 0.50 ftmad z0.h, z0.h, z31.h, #7
-# CHECK-NEXT: 1 3 0.50 ftmad z0.s, z0.s, z31.s, #7
-# CHECK-NEXT: 1 3 0.50 ftsmul z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 3 0.50 ftsmul z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 3 0.50 ftsmul z0.s, z1.s, z31.s
-# CHECK-NEXT: 1 3 0.50 ftssel z0.d, z1.d, z31.d
-# CHECK-NEXT: 1 3 0.50 ftssel z0.h, z1.h, z31.h
-# CHECK-NEXT: 1 3 0.50 ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 12 2.00 eorv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 eorv d0, p7, z31.d
+# CHECK-NEXT: 2 12 2.00 eorv h0, p7, z31.h
+# CHECK-NEXT: 2 12 2.00 eorv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: 2 2 0.50 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 1.00 facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 2 2 1.00 facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 2 2 1.00 facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 2 2 1.00 facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 2 2 1.00 facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 2 2 1.00 facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 2 2 0.50 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 2 2 0.50 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 2 2 0.50 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 2 2 0.50 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 8 1.50 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: 2 19 18.00 fadda h0, p7, h0, z31.h
+# CHECK-NEXT: 2 11 10.00 fadda s0, p7, s0, z31.s
+# CHECK-NEXT: 2 9 2.00 faddv d0, p7, z31.d
+# CHECK-NEXT: 2 13 3.00 faddv h0, p7, z31.h
+# CHECK-NEXT: 2 11 2.50 faddv s0, p7, z31.s
+# CHECK-NEXT: 2 3 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: 2 3 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: 2 3 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: 2 3 0.50 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 2 3 0.50 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 2 3 0.50 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 2 2 1.00 fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 2 2 1.00 fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 2 5 0.50 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: 2 5 0.50 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 2 5 0.50 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: 2 5 0.50 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: 2 5 0.50 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: 2 5 0.50 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: 2 5 0.50 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: 2 5 0.50 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: 2 5 0.50 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: 2 5 0.50 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: 2 5 0.50 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: 2 5 0.50 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: 2 5 0.50 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 2 5 0.50 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 2 5 0.50 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: 2 5 0.50 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 2 2 1.00 fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 2 2 1.00 fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 2 1.00 fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 2 2 1.00 fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 2 2 1.00 fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 2 3 1.00 fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: 2 3 1.00 fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: 2 3 1.00 fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: 2 3 1.00 fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 3 1.00 fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: 2 6 4.00 fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 3 1.00 fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 3 1.00 fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: 2 6 4.00 fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 3 1.00 fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 15 14.00 fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 13 12.00 fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 10 9.00 fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 15 14.00 fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 13 12.00 fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 10 9.00 fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 3 0.50 fexpa z0.d, z31.d
+# CHECK-NEXT: 2 3 0.50 fexpa z0.h, z31.h
+# CHECK-NEXT: 2 3 0.50 fexpa z0.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 2 2 0.50 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 2 2 0.50 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 2 2 0.50 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 9 2.00 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: 2 13 3.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: 2 11 2.50 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: 2 9 2.00 fmaxv d0, p7, z31.d
+# CHECK-NEXT: 2 13 3.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: 2 11 2.50 fmaxv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 2 2 0.50 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 2 2 0.50 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 2 2 0.50 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 2 0.50 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 2 2 0.50 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 2 2 0.50 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 2 2 0.50 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 9 2.00 fminnmv d0, p7, z31.d
+# CHECK-NEXT: 2 13 3.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: 2 11 2.50 fminnmv s0, p7, z31.s
+# CHECK-NEXT: 2 9 2.00 fminv d0, p7, z31.d
+# CHECK-NEXT: 2 13 3.00 fminv h0, p7, z31.h
+# CHECK-NEXT: 2 11 2.50 fminv s0, p7, z31.s
+# CHECK-NEXT: 2 4 0.50 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 2 4 0.50 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 2 4 0.50 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 2 4 0.50 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 2 4 0.50 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 2 4 0.50 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 2 2 0.50 fmov z0.d, #-10.00000000
+# CHECK-NEXT: 2 2 0.50 fmov z0.d, #0.12500000
+# CHECK-NEXT: 2 2 0.50 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: 2 2 0.50 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: 2 2 0.50 fmov z0.h, #-0.12500000
+# CHECK-NEXT: 2 2 0.50 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: 2 2 0.50 fmov z0.s, #-0.12500000
+# CHECK-NEXT: 2 2 0.50 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: 2 4 0.50 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 3 0.50 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 2 3 0.50 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 3 0.50 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: 2 3 0.50 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 3 0.50 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 2 3 0.50 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 3 0.50 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: 2 3 0.50 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 3 0.50 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 2 3 0.50 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 3 0.50 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: 2 3 0.50 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 3 0.50 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: 2 3 0.50 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: 2 3 0.50 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: 2 3 0.50 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: 2 3 0.50 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: 2 3 0.50 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: 2 3 0.50 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 3 0.50 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 3 0.50 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 4 0.50 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 4 0.50 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 3 1.00 frecpe z0.d, z31.d
+# CHECK-NEXT: 2 6 1.00 frecpe z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 frecpe z0.s, z31.s
+# CHECK-NEXT: 2 4 0.50 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 3 1.00 frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 3 1.00 frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 3 1.00 frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 6 1.00 frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 frsqrte z0.d, z31.d
+# CHECK-NEXT: 2 6 1.00 frsqrte z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 frsqrte z0.s, z31.s
+# CHECK-NEXT: 2 4 0.50 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 4 0.50 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 4 0.50 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 3 0.50 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 3 0.50 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 3 0.50 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 16 14.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 13 12.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 10 9.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 2 2 0.50 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 2 2 0.50 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 2 2 0.50 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 2 0.50 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 2 2 0.50 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 2 2 0.50 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 2 2 0.50 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 2 2 0.50 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 2 2 0.50 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 2 3 0.50 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: 2 3 0.50 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: 2 3 0.50 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: 2 3 0.50 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 3 0.50 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 3 0.50 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 3 0.50 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 3 0.50 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 3 0.50 ftssel z0.s, z1.s, z31.s
# CHECK-NEXT: 1 2 1.00 incb x0
# CHECK-NEXT: 1 2 1.00 incb x0, #14
# CHECK-NEXT: 1 2 1.00 incb x0, all, mul #16
@@ -3200,15 +3200,15 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 incd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 incd x0, pow2
# CHECK-NEXT: 1 2 1.00 incd x0, vl1
-# CHECK-NEXT: 1 2 0.50 incd z0.d
-# CHECK-NEXT: 1 2 0.50 incd z0.d, all, mul #16
+# CHECK-NEXT: 2 2 0.50 incd z0.d
+# CHECK-NEXT: 2 2 0.50 incd z0.d, all, mul #16
# CHECK-NEXT: 1 2 1.00 inch x0
# CHECK-NEXT: 1 2 1.00 inch x0, #14
# CHECK-NEXT: 1 2 1.00 inch x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 inch x0, pow2
# CHECK-NEXT: 1 2 1.00 inch x0, vl1
-# CHECK-NEXT: 1 2 0.50 inch z0.h
-# CHECK-NEXT: 1 2 0.50 inch z0.h, all, mul #16
+# CHECK-NEXT: 2 2 0.50 inch z0.h
+# CHECK-NEXT: 2 2 0.50 inch z0.h, all, mul #16
# CHECK-NEXT: 1 2 1.00 incp x0, p0.b
# CHECK-NEXT: 1 2 1.00 incp x0, p0.d
# CHECK-NEXT: 1 2 1.00 incp x0, p0.h
@@ -3217,76 +3217,76 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.d
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.h
# CHECK-NEXT: 1 2 1.00 incp xzr, p15.s
-# CHECK-NEXT: 2 7 2.00 incp z31.d, p15.d
-# CHECK-NEXT: 2 7 2.00 incp z31.h, p15.h
-# CHECK-NEXT: 2 7 2.00 incp z31.s, p15.s
+# CHECK-NEXT: 3 7 2.00 incp z31.d, p15.d
+# CHECK-NEXT: 3 7 2.00 incp z31.h, p15.h
+# CHECK-NEXT: 3 7 2.00 incp z31.s, p15.s
# CHECK-NEXT: 1 2 1.00 incw x0
# CHECK-NEXT: 1 2 1.00 incw x0, #14
# CHECK-NEXT: 1 2 1.00 incw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 incw x0, pow2
# CHECK-NEXT: 1 2 1.00 incw x0, vl1
-# CHECK-NEXT: 1 2 0.50 incw z0.s
-# CHECK-NEXT: 1 2 0.50 incw z0.s, all, mul #16
-# CHECK-NEXT: 1 4 1.00 index z0.b, #0, #0
-# CHECK-NEXT: 1 5 2.00 index z0.d, #0, #0
-# CHECK-NEXT: 1 4 1.00 index z0.h, #0, #0
-# CHECK-NEXT: 2 7 1.00 index z0.h, w0, w0
-# CHECK-NEXT: 1 4 1.00 index z0.s, #0, #0
-# CHECK-NEXT: 2 7 1.00 index z21.b, w10, w21
-# CHECK-NEXT: 2 8 2.00 index z21.d, x10, x21
-# CHECK-NEXT: 2 7 1.00 index z21.s, w10, w21
-# CHECK-NEXT: 2 7 1.00 index z23.b, #13, w8
-# CHECK-NEXT: 2 7 1.00 index z23.b, w13, #8
-# CHECK-NEXT: 2 8 2.00 index z23.d, #13, x8
-# CHECK-NEXT: 2 8 2.00 index z23.d, x13, #8
-# CHECK-NEXT: 2 7 1.00 index z23.h, #13, w8
-# CHECK-NEXT: 2 7 1.00 index z23.h, w13, #8
-# CHECK-NEXT: 2 7 1.00 index z23.s, #13, w8
-# CHECK-NEXT: 2 7 1.00 index z23.s, w13, #8
-# CHECK-NEXT: 1 4 1.00 index z31.b, #-1, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.b, #-1, wzr
-# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, wzr
-# CHECK-NEXT: 1 5 2.00 index z31.d, #-1, #-1
-# CHECK-NEXT: 2 8 2.00 index z31.d, #-1, xzr
-# CHECK-NEXT: 2 8 2.00 index z31.d, xzr, #-1
-# CHECK-NEXT: 2 8 2.00 index z31.d, xzr, xzr
-# CHECK-NEXT: 1 4 1.00 index z31.h, #-1, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.h, #-1, wzr
-# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, wzr
-# CHECK-NEXT: 1 4 1.00 index z31.s, #-1, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.s, #-1, wzr
-# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, #-1
-# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, wzr
-# CHECK-NEXT: 2 6 1.00 insr z0.b, w0
-# CHECK-NEXT: 2 6 1.00 insr z0.d, x0
-# CHECK-NEXT: 2 6 1.00 insr z0.h, w0
-# CHECK-NEXT: 2 6 1.00 insr z0.s, w0
-# CHECK-NEXT: 1 3 1.00 insr z31.b, b31
-# CHECK-NEXT: 2 6 1.00 insr z31.b, wzr
-# CHECK-NEXT: 1 3 1.00 insr z31.d, d31
-# CHECK-NEXT: 2 6 1.00 insr z31.d, xzr
-# CHECK-NEXT: 1 3 1.00 insr z31.h, h31
-# CHECK-NEXT: 2 6 1.00 insr z31.h, wzr
-# CHECK-NEXT: 1 3 1.00 insr z31.s, s31
-# CHECK-NEXT: 2 6 1.00 insr z31.s, wzr
-# CHECK-NEXT: 1 3 1.00 lasta b0, p7, z31.b
-# CHECK-NEXT: 1 3 1.00 lasta d0, p7, z31.d
-# CHECK-NEXT: 1 3 1.00 lasta h0, p7, z31.h
-# CHECK-NEXT: 1 3 1.00 lasta s0, p7, z31.s
-# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.b
-# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.h
-# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.s
-# CHECK-NEXT: 2 6 1.00 lasta x0, p7, z31.d
-# CHECK-NEXT: 1 3 1.00 lastb b0, p7, z31.b
-# CHECK-NEXT: 1 3 1.00 lastb d0, p7, z31.d
-# CHECK-NEXT: 1 3 1.00 lastb h0, p7, z31.h
-# CHECK-NEXT: 1 3 1.00 lastb s0, p7, z31.s
-# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.b
-# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.h
-# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.s
-# CHECK-NEXT: 2 6 1.00 lastb x0, p7, z31.d
+# CHECK-NEXT: 2 2 0.50 incw z0.s
+# CHECK-NEXT: 2 2 0.50 incw z0.s, all, mul #16
+# CHECK-NEXT: 2 4 1.00 index z0.b, #0, #0
+# CHECK-NEXT: 2 5 2.00 index z0.d, #0, #0
+# CHECK-NEXT: 2 4 1.00 index z0.h, #0, #0
+# CHECK-NEXT: 3 7 1.00 index z0.h, w0, w0
+# CHECK-NEXT: 2 4 1.00 index z0.s, #0, #0
+# CHECK-NEXT: 3 7 1.00 index z21.b, w10, w21
+# CHECK-NEXT: 3 8 2.00 index z21.d, x10, x21
+# CHECK-NEXT: 3 7 1.00 index z21.s, w10, w21
+# CHECK-NEXT: 3 7 1.00 index z23.b, #13, w8
+# CHECK-NEXT: 3 7 1.00 index z23.b, w13, #8
+# CHECK-NEXT: 3 8 2.00 index z23.d, #13, x8
+# CHECK-NEXT: 3 8 2.00 index z23.d, x13, #8
+# CHECK-NEXT: 3 7 1.00 index z23.h, #13, w8
+# CHECK-NEXT: 3 7 1.00 index z23.h, w13, #8
+# CHECK-NEXT: 3 7 1.00 index z23.s, #13, w8
+# CHECK-NEXT: 3 7 1.00 index z23.s, w13, #8
+# CHECK-NEXT: 2 4 1.00 index z31.b, #-1, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.b, #-1, wzr
+# CHECK-NEXT: 3 7 1.00 index z31.b, wzr, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.b, wzr, wzr
+# CHECK-NEXT: 2 5 2.00 index z31.d, #-1, #-1
+# CHECK-NEXT: 3 8 2.00 index z31.d, #-1, xzr
+# CHECK-NEXT: 3 8 2.00 index z31.d, xzr, #-1
+# CHECK-NEXT: 3 8 2.00 index z31.d, xzr, xzr
+# CHECK-NEXT: 2 4 1.00 index z31.h, #-1, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.h, #-1, wzr
+# CHECK-NEXT: 3 7 1.00 index z31.h, wzr, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.h, wzr, wzr
+# CHECK-NEXT: 2 4 1.00 index z31.s, #-1, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.s, #-1, wzr
+# CHECK-NEXT: 3 7 1.00 index z31.s, wzr, #-1
+# CHECK-NEXT: 3 7 1.00 index z31.s, wzr, wzr
+# CHECK-NEXT: 3 6 1.00 insr z0.b, w0
+# CHECK-NEXT: 3 6 1.00 insr z0.d, x0
+# CHECK-NEXT: 3 6 1.00 insr z0.h, w0
+# CHECK-NEXT: 3 6 1.00 insr z0.s, w0
+# CHECK-NEXT: 2 3 1.00 insr z31.b, b31
+# CHECK-NEXT: 3 6 1.00 insr z31.b, wzr
+# CHECK-NEXT: 2 3 1.00 insr z31.d, d31
+# CHECK-NEXT: 3 6 1.00 insr z31.d, xzr
+# CHECK-NEXT: 2 3 1.00 insr z31.h, h31
+# CHECK-NEXT: 3 6 1.00 insr z31.h, wzr
+# CHECK-NEXT: 2 3 1.00 insr z31.s, s31
+# CHECK-NEXT: 3 6 1.00 insr z31.s, wzr
+# CHECK-NEXT: 2 3 1.00 lasta b0, p7, z31.b
+# CHECK-NEXT: 2 3 1.00 lasta d0, p7, z31.d
+# CHECK-NEXT: 2 3 1.00 lasta h0, p7, z31.h
+# CHECK-NEXT: 2 3 1.00 lasta s0, p7, z31.s
+# CHECK-NEXT: 3 6 1.00 lasta w0, p7, z31.b
+# CHECK-NEXT: 3 6 1.00 lasta w0, p7, z31.h
+# CHECK-NEXT: 3 6 1.00 lasta w0, p7, z31.s
+# CHECK-NEXT: 3 6 1.00 lasta x0, p7, z31.d
+# CHECK-NEXT: 2 3 1.00 lastb b0, p7, z31.b
+# CHECK-NEXT: 2 3 1.00 lastb d0, p7, z31.d
+# CHECK-NEXT: 2 3 1.00 lastb h0, p7, z31.h
+# CHECK-NEXT: 2 3 1.00 lastb s0, p7, z31.s
+# CHECK-NEXT: 3 6 1.00 lastb w0, p7, z31.b
+# CHECK-NEXT: 3 6 1.00 lastb w0, p7, z31.h
+# CHECK-NEXT: 3 6 1.00 lastb w0, p7, z31.s
+# CHECK-NEXT: 3 6 1.00 lastb x0, p7, z31.d
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [sp, x0]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [x0, x0]
# CHECK-NEXT: 1 6 0.50 * ld1b { z0.b }, p0/z, [x0]
@@ -3485,66 +3485,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
# CHECK-NEXT: 2 11 4.00 * ld1w { z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT: 2 9 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 8 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT: 2 8 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 1.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 2 9 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 8 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT: 2 8 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 1.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 10 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 8 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT: 2 8 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 3 10 1.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 2 9 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 8 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT: 2 8 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: 2 8 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: 2 9 1.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 3 13 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 11 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: 2 11 3.00 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 11 3.00 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 13 3.00 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 3 13 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 11 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: 2 11 3.00 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 11 3.00 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 13 3.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 13 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 11 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: 2 11 3.00 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 11 3.00 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 13 3.00 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 13 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 11 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: 2 11 3.00 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: 2 11 3.00 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: 3 13 3.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: 3 13 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: 2 12 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: 2 12 4.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 2 12 4.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 3 13 4.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: 3 13 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: 2 12 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: 2 12 4.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 2 12 4.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 3 13 4.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: 3 13 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: 2 12 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: 2 12 4.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 2 12 4.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 3 13 4.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: 3 13 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: 2 12 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: 2 12 4.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: 2 12 4.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: 3 13 4.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 3 9 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 3 8 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 8 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 3 9 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 8 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 8 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 10 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 3 8 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 8 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 10 1.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 3 9 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 8 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 3 8 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 13 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 3 11 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 3 11 3.00 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 3.00 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 13 3.00 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 4 13 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 11 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 3 11 3.00 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 3.00 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 13 3.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 13 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 3 11 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 3 11 3.00 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 3.00 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 13 3.00 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 13 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 11 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 3 11 3.00 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 3 11 3.00 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 4 13 3.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 4 13 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 3 12 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 3 12 4.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 3 12 4.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 13 4.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 4 13 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 3 12 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 3 12 4.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 3 12 4.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 13 4.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 4 13 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 3 12 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 3 12 4.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 3 12 4.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 13 4.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 13 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 3 12 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 3 12 4.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 3 12 4.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 4 13 4.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: 2 9 2.00 * U ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0]
@@ -3722,172 +3722,172 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 6 0.50 * ldr z0, [x0]
# CHECK-NEXT: 1 6 0.50 * ldr z23, [x13, #255, mul vl]
# CHECK-NEXT: 1 6 0.50 * ldr z31, [sp, #-256, mul vl]
-# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.b, z1.b, z2.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.d, p0/m, z0.d, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.h, z1.h, z2.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: 1 2 1.00 lsl z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 1.00 lsl z0.s, z1.s, z2.d
-# CHECK-NEXT: 1 2 1.00 lsl z31.b, p0/m, z31.b, #7
-# CHECK-NEXT: 1 2 1.00 lsl z31.b, z31.b, #7
-# CHECK-NEXT: 1 2 1.00 lsl z31.d, p0/m, z31.d, #63
-# CHECK-NEXT: 1 2 1.00 lsl z31.d, z31.d, #63
-# CHECK-NEXT: 1 2 1.00 lsl z31.h, p0/m, z31.h, #15
-# CHECK-NEXT: 1 2 1.00 lsl z31.h, z31.h, #15
-# CHECK-NEXT: 1 2 1.00 lsl z31.s, p0/m, z31.s, #31
-# CHECK-NEXT: 1 2 1.00 lsl z31.s, z31.s, #31
-# CHECK-NEXT: 1 2 1.00 lslr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 lslr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 lslr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 lslr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.b, z0.b, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.b, z1.b, z2.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.d, z0.d, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.h, z0.h, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.h, z1.h, z2.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: 1 2 1.00 lsr z0.s, z0.s, #1
-# CHECK-NEXT: 1 2 1.00 lsr z0.s, z1.s, z2.d
-# CHECK-NEXT: 1 2 1.00 lsr z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: 1 2 1.00 lsr z31.b, z31.b, #8
-# CHECK-NEXT: 1 2 1.00 lsr z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: 1 2 1.00 lsr z31.d, z31.d, #64
-# CHECK-NEXT: 1 2 1.00 lsr z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: 1 2 1.00 lsr z31.h, z31.h, #16
-# CHECK-NEXT: 1 2 1.00 lsr z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 lsr z31.s, z31.s, #32
-# CHECK-NEXT: 1 2 1.00 lsrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 1.00 lsrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 1.00 lsrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 1.00 lsrr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 5 2.00 mad z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 5 2.00 mla z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 5 2.00 mls z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 1.00 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 2 2 1.00 lsl z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 1.00 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: 2 2 1.00 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 2 2 1.00 lsl z31.b, z31.b, #7
+# CHECK-NEXT: 2 2 1.00 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 2 2 1.00 lsl z31.d, z31.d, #63
+# CHECK-NEXT: 2 2 1.00 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 2 2 1.00 lsl z31.h, z31.h, #15
+# CHECK-NEXT: 2 2 1.00 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 2 2 1.00 lsl z31.s, z31.s, #31
+# CHECK-NEXT: 2 2 1.00 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 1.00 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.b, z0.b, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.d, z0.d, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.h, z0.h, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 1.00 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 2 2 1.00 lsr z0.s, z0.s, #1
+# CHECK-NEXT: 2 2 1.00 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: 2 2 1.00 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 2 2 1.00 lsr z31.b, z31.b, #8
+# CHECK-NEXT: 2 2 1.00 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 2 2 1.00 lsr z31.d, z31.d, #64
+# CHECK-NEXT: 2 2 1.00 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 2 2 1.00 lsr z31.h, z31.h, #16
+# CHECK-NEXT: 2 2 1.00 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 2 2 1.00 lsr z31.s, z31.s, #32
+# CHECK-NEXT: 2 2 1.00 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 1.00 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 1.00 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 5 2.00 mad z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 5 2.00 mla z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 5 2.00 mls z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0/m, p0.b
# CHECK-NEXT: 1 1 1.00 mov p0.b, p0/z, p0.b
# CHECK-NEXT: 1 1 1.00 mov p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 mov p15.b, p15/m, p15.b
# CHECK-NEXT: 1 1 1.00 mov p15.b, p15/z, p15.b
-# CHECK-NEXT: 1 2 0.50 mov z0.b, #127
-# CHECK-NEXT: 1 2 0.50 mov z0.b, b0
-# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/m, b0
-# CHECK-NEXT: 2 5 1.00 mov z0.b, p0/m, w0
-# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/z, #127
+# CHECK-NEXT: 2 2 0.50 mov z0.b, #127
+# CHECK-NEXT: 2 2 0.50 mov z0.b, b0
+# CHECK-NEXT: 2 2 0.50 mov z0.b, p0/m, b0
+# CHECK-NEXT: 3 5 1.00 mov z0.b, p0/m, w0
+# CHECK-NEXT: 2 2 0.50 mov z0.b, p0/z, #127
# CHECK-NEXT: 1 3 1.00 mov z0.b, w0
-# CHECK-NEXT: 1 2 0.50 mov z0.d, #0
-# CHECK-NEXT: 1 2 0.50 mov z0.d, #0xe0000000000003ff
-# CHECK-NEXT: 1 2 0.50 mov z0.d, #0xffffffffffff7fff
-# CHECK-NEXT: 1 2 0.50 mov z0.d, #32768
-# CHECK-NEXT: 1 2 0.50 mov z0.d, d0
-# CHECK-NEXT: 1 2 0.50 mov z0.d, p0/m, d0
-# CHECK-NEXT: 2 5 1.00 mov z0.d, p0/m, x0
+# CHECK-NEXT: 2 2 0.50 mov z0.d, #0
+# CHECK-NEXT: 2 2 0.50 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: 2 2 0.50 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: 2 2 0.50 mov z0.d, #32768
+# CHECK-NEXT: 2 2 0.50 mov z0.d, d0
+# CHECK-NEXT: 2 2 0.50 mov z0.d, p0/m, d0
+# CHECK-NEXT: 3 5 1.00 mov z0.d, p0/m, x0
# CHECK-NEXT: 1 3 1.00 mov z0.d, x0
-# CHECK-NEXT: 1 2 0.50 mov z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 mov z0.h, #-256
-# CHECK-NEXT: 1 2 0.50 mov z0.h, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z0.h, #0
-# CHECK-NEXT: 1 2 0.50 mov z0.h, #32512
-# CHECK-NEXT: 1 2 0.50 mov z0.h, #32767
-# CHECK-NEXT: 1 2 0.50 mov z0.h, h0
-# CHECK-NEXT: 1 2 0.50 mov z0.h, p0/m, h0
-# CHECK-NEXT: 2 5 1.00 mov z0.h, p0/m, w0
-# CHECK-NEXT: 1 2 0.50 mov z0.h, p0/z, #32512
+# CHECK-NEXT: 2 2 0.50 mov z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 mov z0.h, #-256
+# CHECK-NEXT: 2 2 0.50 mov z0.h, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z0.h, #0
+# CHECK-NEXT: 2 2 0.50 mov z0.h, #32512
+# CHECK-NEXT: 2 2 0.50 mov z0.h, #32767
+# CHECK-NEXT: 2 2 0.50 mov z0.h, h0
+# CHECK-NEXT: 2 2 0.50 mov z0.h, p0/m, h0
+# CHECK-NEXT: 3 5 1.00 mov z0.h, p0/m, w0
+# CHECK-NEXT: 2 2 0.50 mov z0.h, p0/z, #32512
# CHECK-NEXT: 1 3 1.00 mov z0.h, w0
-# CHECK-NEXT: 1 2 0.50 mov z0.q, q0
-# CHECK-NEXT: 1 2 0.50 mov z0.s, #0
-# CHECK-NEXT: 1 2 0.50 mov z0.s, #0xffff7fff
-# CHECK-NEXT: 1 2 0.50 mov z0.s, #32768
-# CHECK-NEXT: 1 2 0.50 mov z0.s, p0/m, s0
-# CHECK-NEXT: 2 5 1.00 mov z0.s, p0/m, w0
-# CHECK-NEXT: 1 2 0.50 mov z0.s, s0
+# CHECK-NEXT: 2 2 0.50 mov z0.q, q0
+# CHECK-NEXT: 2 2 0.50 mov z0.s, #0
+# CHECK-NEXT: 2 2 0.50 mov z0.s, #0xffff7fff
+# CHECK-NEXT: 2 2 0.50 mov z0.s, #32768
+# CHECK-NEXT: 2 2 0.50 mov z0.s, p0/m, s0
+# CHECK-NEXT: 3 5 1.00 mov z0.s, p0/m, w0
+# CHECK-NEXT: 2 2 0.50 mov z0.s, s0
# CHECK-NEXT: 1 3 1.00 mov z0.s, w0
-# CHECK-NEXT: 1 2 0.50 mov z21.d, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.d, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.d, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.d, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p15/m, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.d, p15/m, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.h, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.h, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.h, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.h, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p15/m, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.h, p15/m, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.s, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.s, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.s, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.s, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #127
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #32512
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-128
-# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768
-# CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b
-# CHECK-NEXT: 1 2 0.50 mov z31.b, p7/m, b31
-# CHECK-NEXT: 1 2 0.50 movprfx z31, z6
-# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
+# CHECK-NEXT: 2 2 0.50 mov z21.d, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.d, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.d, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.d, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p0/z, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p0/z, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p0/z, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p15/m, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.h, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.h, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.h, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.h, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p0/z, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p0/z, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p0/z, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p15/m, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.s, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.s, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.s, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.s, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p0/z, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p0/z, #127
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p0/z, #32512
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p15/m, #-128
+# CHECK-NEXT: 2 2 0.50 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: 2 2 0.50 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: 2 2 0.50 mov z31.b, p7/m, b31
+# CHECK-NEXT: 2 2 0.50 movprfx z31, z6
+# CHECK-NEXT: 3 5 1.00 mov z31.b, p7/m, wsp
# CHECK-NEXT: 1 3 1.00 mov z31.b, wsp
-# CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]
-# CHECK-NEXT: 1 2 0.50 mov z31.d, p15/m, z31.d
-# CHECK-NEXT: 1 2 0.50 mov z31.d, p7/m, d31
-# CHECK-NEXT: 1 2 0.50 movprfx z31.d, p7/z, z6.d
-# CHECK-NEXT: 2 5 1.00 mov z31.d, p7/m, sp
+# CHECK-NEXT: 2 2 0.50 mov z31.b, z31.b[63]
+# CHECK-NEXT: 2 2 0.50 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: 2 2 0.50 mov z31.d, p7/m, d31
+# CHECK-NEXT: 2 2 0.50 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: 3 5 1.00 mov z31.d, p7/m, sp
# CHECK-NEXT: 1 3 1.00 mov z31.d, sp
-# CHECK-NEXT: 1 2 0.50 mov z31.d, z0.d
-# CHECK-NEXT: 1 2 0.50 mov z31.d, z31.d[7]
-# CHECK-NEXT: 1 2 0.50 mov z31.h, p15/m, z31.h
-# CHECK-NEXT: 1 2 0.50 mov z31.h, p7/m, h31
-# CHECK-NEXT: 2 5 1.00 mov z31.h, p7/m, wsp
+# CHECK-NEXT: 2 2 0.50 mov z31.d, z0.d
+# CHECK-NEXT: 2 2 0.50 mov z31.d, z31.d[7]
+# CHECK-NEXT: 2 2 0.50 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: 2 2 0.50 mov z31.h, p7/m, h31
+# CHECK-NEXT: 3 5 1.00 mov z31.h, p7/m, wsp
# CHECK-NEXT: 1 3 1.00 mov z31.h, wsp
-# CHECK-NEXT: 1 2 0.50 mov z31.h, z31.h[31]
-# CHECK-NEXT: 1 2 0.50 mov z31.s, p15/m, z31.s
-# CHECK-NEXT: 1 2 0.50 mov z31.s, p7/m, s31
-# CHECK-NEXT: 2 5 1.00 mov z31.s, p7/m, wsp
+# CHECK-NEXT: 2 2 0.50 mov z31.h, z31.h[31]
+# CHECK-NEXT: 2 2 0.50 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: 2 2 0.50 mov z31.s, p7/m, s31
+# CHECK-NEXT: 3 5 1.00 mov z31.s, p7/m, wsp
# CHECK-NEXT: 1 3 1.00 mov z31.s, wsp
-# CHECK-NEXT: 1 2 0.50 mov z31.s, z31.s[15]
-# CHECK-NEXT: 1 2 0.50 mov z5.b, #-1
-# CHECK-NEXT: 1 2 0.50 mov z5.b, #-128
-# CHECK-NEXT: 1 2 0.50 mov z5.b, #127
-# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #-1
-# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #-128
-# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #127
-# CHECK-NEXT: 1 2 0.50 mov z5.b, p15/m, #-128
-# CHECK-NEXT: 1 2 0.50 mov z5.d, #-6
-# CHECK-NEXT: 1 2 0.50 mov z5.h, #-6
-# CHECK-NEXT: 1 2 0.50 mov z5.q, z17.q[3]
-# CHECK-NEXT: 1 2 0.50 mov z5.s, #-6
+# CHECK-NEXT: 2 2 0.50 mov z31.s, z31.s[15]
+# CHECK-NEXT: 2 2 0.50 mov z5.b, #-1
+# CHECK-NEXT: 2 2 0.50 mov z5.b, #-128
+# CHECK-NEXT: 2 2 0.50 mov z5.b, #127
+# CHECK-NEXT: 2 2 0.50 mov z5.b, p0/z, #-1
+# CHECK-NEXT: 2 2 0.50 mov z5.b, p0/z, #-128
+# CHECK-NEXT: 2 2 0.50 mov z5.b, p0/z, #127
+# CHECK-NEXT: 2 2 0.50 mov z5.b, p15/m, #-128
+# CHECK-NEXT: 2 2 0.50 mov z5.d, #-6
+# CHECK-NEXT: 2 2 0.50 mov z5.h, #-6
+# CHECK-NEXT: 2 2 0.50 mov z5.q, z17.q[3]
+# CHECK-NEXT: 2 2 0.50 mov z5.s, #-6
# CHECK-NEXT: 1 2 2.00 movs p0.b, p0.b
# CHECK-NEXT: 1 2 2.00 movs p0.b, p0/z, p0.b
# CHECK-NEXT: 1 2 2.00 movs p15.b, p15.b
@@ -3898,44 +3898,44 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL2
# CHECK-NEXT: 1 1 0.07 U mrs x3, ZCR_EL3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL1, x3
-# CHECK-NEXT: 1 5 2.00 msb z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 5 2.00 msb z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL12, x3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL2, x3
# CHECK-NEXT: 1 1 0.07 U msr ZCR_EL3, x3
-# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 1 5 2.00 mul z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 4 1.00 mul z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 4 1.00 mul z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #-128
-# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #127
-# CHECK-NEXT: 1 5 2.00 mul z31.d, z31.d, #-128
-# CHECK-NEXT: 1 5 2.00 mul z31.d, z31.d, #127
-# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #-128
-# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #127
-# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #-128
-# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #127
+# CHECK-NEXT: 2 4 1.00 mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 2 5 2.00 mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 4 1.00 mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 4 1.00 mul z31.b, z31.b, #-128
+# CHECK-NEXT: 2 4 1.00 mul z31.b, z31.b, #127
+# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #-128
+# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #127
+# CHECK-NEXT: 2 4 1.00 mul z31.h, z31.h, #-128
+# CHECK-NEXT: 2 4 1.00 mul z31.h, z31.h, #127
+# CHECK-NEXT: 2 4 1.00 mul z31.s, z31.s, #-128
+# CHECK-NEXT: 2 4 1.00 mul z31.s, z31.s, #127
# CHECK-NEXT: 1 1 1.00 nand p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nand p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 2.00 nands p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 2 2.00 nands p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: 1 2 0.50 neg z0.b, p0/m, z0.b
-# CHECK-NEXT: 1 2 0.50 neg z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 0.50 neg z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 2 0.50 neg z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 0.50 neg z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 neg z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 neg z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 neg z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: 2 2 0.50 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 0.50 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 2 0.50 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 0.50 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 neg z31.s, p7/m, z31.s
# CHECK-NEXT: 1 1 1.00 nor p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 1 1.00 nor p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 2 2.00 nors p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 2 2.00 nors p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 not p0.b, p0/z, p0.b
# CHECK-NEXT: 1 1 1.00 not p15.b, p15/z, p15.b
-# CHECK-NEXT: 1 2 0.50 not z31.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 not z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 not z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 not z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 not z31.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 not z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 not z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 not z31.s, p7/m, z31.s
# CHECK-NEXT: 1 2 2.00 nots p0.b, p0/z, p0.b
# CHECK-NEXT: 1 2 2.00 nots p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 1.00 orn p0.b, p0/z, p0.b, p0.b
@@ -3943,24 +3943,24 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 2.00 orns p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: 1 2 2.00 orns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: 1 1 1.00 orr p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0x6
-# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: 1 2 0.50 orr z0.s, z0.s, #0x6
-# CHECK-NEXT: 1 2 0.50 orr z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: 1 2 0.50 orr z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 orr z23.h, z23.h, #0x6
-# CHECK-NEXT: 1 2 0.50 orr z23.h, z23.h, #0xfff9
-# CHECK-NEXT: 1 2 0.50 orr z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 orr z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 orr z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 orr z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0x6
-# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: 2 2 0.50 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: 2 2 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 2 2 0.50 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: 2 2 0.50 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 2 2 0.50 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: 2 2 0.50 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 2 2 0.50 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: 2 2 0.50 orr z5.b, z5.b, #0xf9
# CHECK-NEXT: 1 2 2.00 orrs p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: 1 12 2.00 orv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 orv d0, p7, z31.d
-# CHECK-NEXT: 1 12 2.00 orv h0, p7, z31.h
-# CHECK-NEXT: 1 12 2.00 orv s0, p7, z31.s
+# CHECK-NEXT: 2 12 2.00 orv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 orv d0, p7, z31.d
+# CHECK-NEXT: 2 12 2.00 orv h0, p7, z31.h
+# CHECK-NEXT: 2 12 2.00 orv s0, p7, z31.s
# CHECK-NEXT: 1 2 1.00 pfalse p15.b
# CHECK-NEXT: 1 2 1.00 pfirst p0.b, p15, p0.b
# CHECK-NEXT: 1 2 1.00 pfirst p15.b, p15, p15.b
@@ -4053,10 +4053,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 punpkhi p15.h, p15.b
# CHECK-NEXT: 1 2 1.00 punpklo p0.h, p0.b
# CHECK-NEXT: 1 2 1.00 punpklo p15.h, p15.b
-# CHECK-NEXT: 1 2 0.50 rbit z0.b, p7/m, z31.b
-# CHECK-NEXT: 1 2 0.50 rbit z0.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 rbit z0.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: 2 2 0.50 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 rbit z0.s, p7/m, z31.s
# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
# CHECK-NEXT: 1 3 2.00 * U rdffr p0.b, p0/z
# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
@@ -4067,95 +4067,95 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 rdvl x21, #-32
# CHECK-NEXT: 1 2 1.00 rdvl x23, #31
# CHECK-NEXT: 1 2 1.00 rdvl xzr, #-1
-# CHECK-NEXT: 1 2 0.50 rev z0.b, z31.b
-# CHECK-NEXT: 1 2 0.50 rev z0.d, z31.d
-# CHECK-NEXT: 1 2 0.50 rev z0.h, z31.h
-# CHECK-NEXT: 1 2 0.50 rev z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 revb z0.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 revb z0.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 0.50 revb z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 revh z0.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 revh z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 0.50 revw z0.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 sabd z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 sabd z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 sabd z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 sabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 14 2.00 saddv d0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 saddv d0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 saddv d0, p7, z31.s
-# CHECK-NEXT: 1 3 1.00 scvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 scvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 scvtf z0.h, p0/m, z0.s
-# CHECK-NEXT: 1 3 1.00 scvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: 1 4 2.00 scvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 20 20.00 sdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 11.00 sdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 20 20.00 sdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 11.00 sdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z31.h
-# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z31.b
-# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: 1 2 0.50 sel z23.b, p11, z13.b, z8.b
-# CHECK-NEXT: 1 2 0.50 sel z23.d, p11, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 sel z23.h, p11, z13.h, z8.h
-# CHECK-NEXT: 1 2 0.50 sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: 2 2 0.50 rev z0.b, z31.b
+# CHECK-NEXT: 2 2 0.50 rev z0.d, z31.d
+# CHECK-NEXT: 2 2 0.50 rev z0.h, z31.h
+# CHECK-NEXT: 2 2 0.50 rev z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 0.50 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 0.50 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 14 2.00 saddv d0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 saddv d0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 saddv d0, p7, z31.s
+# CHECK-NEXT: 2 3 1.00 scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 2 3 1.00 scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 20 20.00 sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 12 11.00 sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 20 20.00 sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 12 11.00 sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 4 1.00 sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 2 4 1.00 sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: 2 3 0.50 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: 2 3 0.50 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 2 2 0.50 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: 2 2 0.50 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: 2 2 0.50 sel z23.s, p11, z13.s, z8.s
# CHECK-NEXT: 1 2 1.00 * U setffr
-# CHECK-NEXT: 1 2 0.50 smax z0.b, z0.b, #-128
-# CHECK-NEXT: 1 2 0.50 smax z0.d, z0.d, #-128
-# CHECK-NEXT: 1 2 0.50 smax z0.h, z0.h, #-128
-# CHECK-NEXT: 1 2 0.50 smax z0.s, z0.s, #-128
-# CHECK-NEXT: 1 2 0.50 smax z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 smax z31.b, z31.b, #127
-# CHECK-NEXT: 1 2 0.50 smax z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 smax z31.d, z31.d, #127
-# CHECK-NEXT: 1 2 0.50 smax z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 smax z31.h, z31.h, #127
-# CHECK-NEXT: 1 2 0.50 smax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 smax z31.s, z31.s, #127
-# CHECK-NEXT: 1 14 2.00 smaxv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 smaxv h0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 smaxv s0, p7, z31.s
-# CHECK-NEXT: 1 2 0.50 smin z0.b, z0.b, #-128
-# CHECK-NEXT: 1 2 0.50 smin z0.d, z0.d, #-128
-# CHECK-NEXT: 1 2 0.50 smin z0.h, z0.h, #-128
-# CHECK-NEXT: 1 2 0.50 smin z0.s, z0.s, #-128
-# CHECK-NEXT: 1 2 0.50 smin z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 smin z31.b, z31.b, #127
-# CHECK-NEXT: 1 2 0.50 smin z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 smin z31.d, z31.d, #127
-# CHECK-NEXT: 1 2 0.50 smin z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 smin z31.h, z31.h, #127
-# CHECK-NEXT: 1 2 0.50 smin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 smin z31.s, z31.s, #127
-# CHECK-NEXT: 1 14 2.00 sminv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 sminv h0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 sminv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 smax z0.b, z0.b, #-128
+# CHECK-NEXT: 2 2 0.50 smax z0.d, z0.d, #-128
+# CHECK-NEXT: 2 2 0.50 smax z0.h, z0.h, #-128
+# CHECK-NEXT: 2 2 0.50 smax z0.s, z0.s, #-128
+# CHECK-NEXT: 2 2 0.50 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 smax z31.b, z31.b, #127
+# CHECK-NEXT: 2 2 0.50 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 smax z31.d, z31.d, #127
+# CHECK-NEXT: 2 2 0.50 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 smax z31.h, z31.h, #127
+# CHECK-NEXT: 2 2 0.50 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 smax z31.s, z31.s, #127
+# CHECK-NEXT: 2 14 2.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 smin z0.b, z0.b, #-128
+# CHECK-NEXT: 2 2 0.50 smin z0.d, z0.d, #-128
+# CHECK-NEXT: 2 2 0.50 smin z0.h, z0.h, #-128
+# CHECK-NEXT: 2 2 0.50 smin z0.s, z0.s, #-128
+# CHECK-NEXT: 2 2 0.50 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 smin z31.b, z31.b, #127
+# CHECK-NEXT: 2 2 0.50 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 smin z31.d, z31.d, #127
+# CHECK-NEXT: 2 2 0.50 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 smin z31.h, z31.h, #127
+# CHECK-NEXT: 2 2 0.50 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 smin z31.s, z31.s, #127
+# CHECK-NEXT: 2 14 2.00 sminv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 sminv h0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 sminv s0, p7, z31.s
# CHECK-NEXT: 1 3 0.50 smmla z0.s, z1.b, z2.b
-# CHECK-NEXT: 1 4 1.00 smulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 1 5 2.00 smulh z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 4 1.00 smulh z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 4 1.00 smulh z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 3 1.00 splice z31.b, p7, z31.b, z31.b
-# CHECK-NEXT: 1 3 1.00 splice z31.d, p7, z31.d, z31.d
-# CHECK-NEXT: 1 3 1.00 splice z31.h, p7, z31.h, z31.h
-# CHECK-NEXT: 1 3 1.00 splice z31.s, p7, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 sqadd z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 sqadd z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 sqadd z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 sqadd z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 sqadd z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 2 4 1.00 smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 2 5 2.00 smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 4 1.00 smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 3 1.00 splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: 2 3 1.00 splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: 2 3 1.00 splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: 2 3 1.00 splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 sqadd z31.s, z31.s, #65280
# CHECK-NEXT: 1 2 1.00 sqdecb x0
# CHECK-NEXT: 1 2 1.00 sqdecb x0, #14
# CHECK-NEXT: 1 2 1.00 sqdecb x0, all, mul #16
@@ -4174,10 +4174,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecd x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdecd z0.d
-# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2
-# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdecd z0.d
+# CHECK-NEXT: 2 2 0.50 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdecd z0.d, pow2
+# CHECK-NEXT: 2 2 0.50 sqdecd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech x0
# CHECK-NEXT: 1 2 1.00 sqdech x0, #14
# CHECK-NEXT: 1 2 1.00 sqdech x0, all, mul #16
@@ -4187,10 +4187,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdech x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdech z0.h
-# CHECK-NEXT: 1 2 0.50 sqdech z0.h, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2
-# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdech z0.h
+# CHECK-NEXT: 2 2 0.50 sqdech z0.h, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdech z0.h, pow2
+# CHECK-NEXT: 2 2 0.50 sqdech z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.b
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.d
# CHECK-NEXT: 1 2 1.00 sqdecp x0, p0.h
@@ -4199,9 +4199,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.d, wzr
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.h, wzr
# CHECK-NEXT: 1 2 1.00 sqdecp xzr, p15.s, wzr
-# CHECK-NEXT: 2 7 2.00 sqdecp z0.d, p0.d
-# CHECK-NEXT: 2 7 2.00 sqdecp z0.h, p0.h
-# CHECK-NEXT: 2 7 2.00 sqdecp z0.s, p0.s
+# CHECK-NEXT: 3 7 2.00 sqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 2.00 sqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 2.00 sqdecp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 sqdecw x0
# CHECK-NEXT: 1 2 1.00 sqdecw x0, #14
# CHECK-NEXT: 1 2 1.00 sqdecw x0, all, mul #16
@@ -4211,10 +4211,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqdecw x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdecw z0.s
-# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2
-# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdecw z0.s
+# CHECK-NEXT: 2 2 0.50 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqdecw z0.s, pow2
+# CHECK-NEXT: 2 2 0.50 sqdecw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincb x0
# CHECK-NEXT: 1 2 1.00 sqincb x0, #14
# CHECK-NEXT: 1 2 1.00 sqincb x0, all, mul #16
@@ -4233,10 +4233,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqincd x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqincd z0.d
-# CHECK-NEXT: 1 2 0.50 sqincd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2
-# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqincd z0.d
+# CHECK-NEXT: 2 2 0.50 sqincd z0.d, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqincd z0.d, pow2
+# CHECK-NEXT: 2 2 0.50 sqincd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch x0
# CHECK-NEXT: 1 2 1.00 sqinch x0, #14
# CHECK-NEXT: 1 2 1.00 sqinch x0, all, mul #16
@@ -4246,10 +4246,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqinch x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqinch z0.h
-# CHECK-NEXT: 1 2 0.50 sqinch z0.h, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2
-# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqinch z0.h
+# CHECK-NEXT: 2 2 0.50 sqinch z0.h, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqinch z0.h, pow2
+# CHECK-NEXT: 2 2 0.50 sqinch z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.b
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.d
# CHECK-NEXT: 1 2 1.00 sqincp x0, p0.h
@@ -4258,9 +4258,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.d, wzr
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.h, wzr
# CHECK-NEXT: 1 2 1.00 sqincp xzr, p15.s, wzr
-# CHECK-NEXT: 2 7 2.00 sqincp z0.d, p0.d
-# CHECK-NEXT: 2 7 2.00 sqincp z0.h, p0.h
-# CHECK-NEXT: 2 7 2.00 sqincp z0.s, p0.s
+# CHECK-NEXT: 3 7 2.00 sqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 2.00 sqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 2.00 sqincp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 sqincw x0
# CHECK-NEXT: 1 2 1.00 sqincw x0, #14
# CHECK-NEXT: 1 2 1.00 sqincw x0, all, mul #16
@@ -4270,25 +4270,25 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2
# CHECK-NEXT: 1 2 1.00 sqincw x0, w0, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqincw z0.s
-# CHECK-NEXT: 1 2 0.50 sqincw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2
-# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 sqsub z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 sqsub z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 sqsub z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 2 2 0.50 sqincw z0.s
+# CHECK-NEXT: 2 2 0.50 sqincw z0.s, all, mul #16
+# CHECK-NEXT: 2 2 0.50 sqincw z0.s, pow2
+# CHECK-NEXT: 2 2 0.50 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 sqsub z31.s, z31.s, #65280
# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0]
# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0, x0]
@@ -4456,161 +4456,161 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 0.50 * str z0, [x0]
# CHECK-NEXT: 2 2 0.50 * str z21, [x10, #-256, mul vl]
# CHECK-NEXT: 2 2 0.50 * str z31, [sp, #255, mul vl]
-# CHECK-NEXT: 1 2 0.50 sub z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 sub z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 sub z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 sub z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 sub z21.b, p5/m, z21.b, z10.b
-# CHECK-NEXT: 1 2 0.50 sub z21.b, z10.b, z21.b
-# CHECK-NEXT: 1 2 0.50 sub z21.d, p5/m, z21.d, z10.d
-# CHECK-NEXT: 1 2 0.50 sub z21.d, z10.d, z21.d
-# CHECK-NEXT: 1 2 0.50 sub z21.h, p5/m, z21.h, z10.h
-# CHECK-NEXT: 1 2 0.50 sub z21.h, z10.h, z21.h
-# CHECK-NEXT: 1 2 0.50 sub z21.s, p5/m, z21.s, z10.s
-# CHECK-NEXT: 1 2 0.50 sub z21.s, z10.s, z21.s
-# CHECK-NEXT: 1 2 0.50 sub z23.b, p3/m, z23.b, z13.b
-# CHECK-NEXT: 1 2 0.50 sub z23.b, z13.b, z8.b
-# CHECK-NEXT: 1 2 0.50 sub z23.d, p3/m, z23.d, z13.d
-# CHECK-NEXT: 1 2 0.50 sub z23.d, z13.d, z8.d
-# CHECK-NEXT: 1 2 0.50 sub z23.h, p3/m, z23.h, z13.h
-# CHECK-NEXT: 1 2 0.50 sub z23.h, z13.h, z8.h
-# CHECK-NEXT: 1 2 0.50 sub z23.s, p3/m, z23.s, z13.s
-# CHECK-NEXT: 1 2 0.50 sub z23.s, z13.s, z8.s
-# CHECK-NEXT: 1 2 0.50 sub z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 sub z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 sub z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 sub z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 sub z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 sub z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 sub z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 sub z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 sub z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 sub z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 sub z31.s, z31.s, #65280
-# CHECK-NEXT: 1 2 0.50 sub z31.s, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 subr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 subr z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 subr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 subr z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 subr z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 subr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 subr z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 subr z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 subr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 subr z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 subr z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 subr z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 subr z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 subr z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 subr z31.s, z31.s, #65280
+# CHECK-NEXT: 2 2 0.50 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 sub z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 sub z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 sub z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 sub z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 2 2 0.50 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: 2 2 0.50 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 2 2 0.50 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: 2 2 0.50 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 2 2 0.50 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: 2 2 0.50 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 2 2 0.50 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: 2 2 0.50 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 2 2 0.50 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: 2 2 0.50 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 2 2 0.50 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: 2 2 0.50 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 2 2 0.50 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: 2 2 0.50 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 2 2 0.50 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: 2 2 0.50 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 sub z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 sub z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 sub z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 sub z31.s, z31.s, #65280
+# CHECK-NEXT: 2 2 0.50 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 subr z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 subr z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 subr z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 subr z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 subr z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 subr z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 subr z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 subr z31.s, z31.s, #65280
# CHECK-NEXT: 1 3 0.50 sudot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: 1 2 0.50 sunpkhi z31.d, z31.s
-# CHECK-NEXT: 1 2 0.50 sunpkhi z31.h, z31.b
-# CHECK-NEXT: 1 2 0.50 sunpkhi z31.s, z31.h
-# CHECK-NEXT: 1 2 0.50 sunpklo z31.d, z31.s
-# CHECK-NEXT: 1 2 0.50 sunpklo z31.h, z31.b
-# CHECK-NEXT: 1 2 0.50 sunpklo z31.s, z31.h
-# CHECK-NEXT: 1 2 1.00 sxtb z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 sxtb z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 2 1.00 sxtb z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 1.00 sxtb z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 1.00 sxtb z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 1.00 sxtb z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 sxth z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 sxth z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 1.00 sxth z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 1.00 sxth z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 sxtw z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 sxtw z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 0.50 tbl z31.b, { z31.b }, z31.b
-# CHECK-NEXT: 1 2 0.50 tbl z31.d, { z31.d }, z31.d
-# CHECK-NEXT: 1 2 0.50 tbl z31.h, { z31.h }, z31.h
-# CHECK-NEXT: 1 2 0.50 tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: 2 2 0.50 sunpkhi z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 sunpkhi z31.h, z31.b
+# CHECK-NEXT: 2 2 0.50 sunpkhi z31.s, z31.h
+# CHECK-NEXT: 2 2 0.50 sunpklo z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 sunpklo z31.h, z31.b
+# CHECK-NEXT: 2 2 0.50 sunpklo z31.s, z31.h
+# CHECK-NEXT: 2 2 1.00 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 2 1.00 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 1.00 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 1.00 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 1.00 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 1.00 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 1.00 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 1.00 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 1.00 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: 2 2 0.50 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: 2 2 0.50 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: 2 2 0.50 tbl z31.s, { z31.s }, z31.s
# CHECK-NEXT: 1 2 1.00 trn1 p15.b, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 trn1 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 trn1 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 trn1 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 trn1 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 trn1 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 trn1 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 trn1 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 trn2 p15.b, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 trn2 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 trn2 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 trn2 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 trn2 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 trn2 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 trn2 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 trn2 z31.s, z31.s, z31.s
-# CHECK-NEXT: 1 2 0.50 uabd z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 uabd z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 uabd z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 uabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 14 2.00 uaddv d0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 uaddv d0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 uaddv d0, p7, z31.s
-# CHECK-NEXT: 1 3 1.00 ucvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 6 4.00 ucvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 4 2.00 ucvtf z0.h, p0/m, z0.s
-# CHECK-NEXT: 1 3 1.00 ucvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: 1 4 2.00 ucvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 20 20.00 udiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 11.00 udiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 20 20.00 udivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 12 11.00 udivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z31.h
-# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z31.b
-# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: 1 2 0.50 umax z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 umax z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 umax z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 umax z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 umax z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 umax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 14 2.00 umaxv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 umaxv h0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 umaxv s0, p7, z31.s
-# CHECK-NEXT: 1 2 0.50 umin z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 umin z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 umin z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 umin z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 umin z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 umin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: 1 14 2.00 uminv b0, p7, z31.b
-# CHECK-NEXT: 1 12 2.00 uminv h0, p7, z31.h
-# CHECK-NEXT: 1 10 2.00 uminv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 14 2.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: 2 3 1.00 ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 6 4.00 ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 2 3 1.00 ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 20 20.00 udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 12 11.00 udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 20 20.00 udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 12 11.00 udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 4 1.00 udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 2 4 1.00 udot z0.d, z1.h, z31.h
+# CHECK-NEXT: 2 3 0.50 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: 2 3 0.50 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 2 2 0.50 umax z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 umax z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 14 2.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: 2 2 0.50 umin z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 umin z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 2 14 2.00 uminv b0, p7, z31.b
+# CHECK-NEXT: 2 12 2.00 uminv h0, p7, z31.h
+# CHECK-NEXT: 2 10 2.00 uminv s0, p7, z31.s
# CHECK-NEXT: 1 3 0.50 ummla z0.s, z1.b, z2.b
-# CHECK-NEXT: 1 4 1.00 umulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: 1 5 2.00 umulh z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: 1 4 1.00 umulh z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: 1 4 1.00 umulh z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: 1 2 0.50 uqadd z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 uqadd z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 uqadd z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 uqadd z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 uqadd z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 2 4 1.00 umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 2 5 2.00 umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 4 1.00 umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 2 2 0.50 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 uqadd z31.s, z31.s, #65280
# CHECK-NEXT: 1 2 1.00 uqdecb w0
# CHECK-NEXT: 1 2 1.00 uqdecb w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecb w0, pow2
@@ -4629,10 +4629,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecd x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdecd x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqdecd z0.d
-# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2
-# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdecd z0.d
+# CHECK-NEXT: 2 2 0.50 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdecd z0.d, pow2
+# CHECK-NEXT: 2 2 0.50 uqdecd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech w0
# CHECK-NEXT: 1 2 1.00 uqdech w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech w0, pow2
@@ -4642,10 +4642,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdech x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdech x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdech x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqdech z0.h
-# CHECK-NEXT: 1 2 0.50 uqdech z0.h, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2
-# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdech z0.h
+# CHECK-NEXT: 2 2 0.50 uqdech z0.h, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdech z0.h, pow2
+# CHECK-NEXT: 2 2 0.50 uqdech z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.b
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.d
# CHECK-NEXT: 1 2 1.00 uqdecp wzr, p15.h
@@ -4654,9 +4654,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.d
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.h
# CHECK-NEXT: 1 2 1.00 uqdecp x0, p0.s
-# CHECK-NEXT: 2 7 2.00 uqdecp z0.d, p0.d
-# CHECK-NEXT: 2 7 2.00 uqdecp z0.h, p0.h
-# CHECK-NEXT: 2 7 2.00 uqdecp z0.s, p0.s
+# CHECK-NEXT: 3 7 2.00 uqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 2.00 uqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 2.00 uqdecp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 uqdecw w0
# CHECK-NEXT: 1 2 1.00 uqdecw w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecw w0, pow2
@@ -4666,10 +4666,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqdecw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqdecw x0, pow2
# CHECK-NEXT: 1 2 1.00 uqdecw x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqdecw z0.s
-# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2
-# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdecw z0.s
+# CHECK-NEXT: 2 2 0.50 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqdecw z0.s, pow2
+# CHECK-NEXT: 2 2 0.50 uqdecw z0.s, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqincb w0
# CHECK-NEXT: 1 2 1.00 uqincb w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincb w0, pow2
@@ -4688,10 +4688,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincd x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincd x0, pow2
# CHECK-NEXT: 1 2 1.00 uqincd x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqincd z0.d
-# CHECK-NEXT: 1 2 0.50 uqincd z0.d, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2
-# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqincd z0.d
+# CHECK-NEXT: 2 2 0.50 uqincd z0.d, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqincd z0.d, pow2
+# CHECK-NEXT: 2 2 0.50 uqincd z0.d, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch w0
# CHECK-NEXT: 1 2 1.00 uqinch w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch w0, pow2
@@ -4701,10 +4701,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqinch x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqinch x0, pow2
# CHECK-NEXT: 1 2 1.00 uqinch x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqinch z0.h
-# CHECK-NEXT: 1 2 0.50 uqinch z0.h, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2
-# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqinch z0.h
+# CHECK-NEXT: 2 2 0.50 uqinch z0.h, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqinch z0.h, pow2
+# CHECK-NEXT: 2 2 0.50 uqinch z0.h, pow2, mul #16
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.b
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.d
# CHECK-NEXT: 1 2 1.00 uqincp wzr, p15.h
@@ -4713,9 +4713,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.d
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.h
# CHECK-NEXT: 1 2 1.00 uqincp x0, p0.s
-# CHECK-NEXT: 2 7 2.00 uqincp z0.d, p0.d
-# CHECK-NEXT: 2 7 2.00 uqincp z0.h, p0.h
-# CHECK-NEXT: 2 7 2.00 uqincp z0.s, p0.s
+# CHECK-NEXT: 3 7 2.00 uqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 2.00 uqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 2.00 uqincp z0.s, p0.s
# CHECK-NEXT: 1 2 1.00 uqincw w0
# CHECK-NEXT: 1 2 1.00 uqincw w0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincw w0, pow2
@@ -4725,62 +4725,62 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uqincw x0, all, mul #16
# CHECK-NEXT: 1 2 1.00 uqincw x0, pow2
# CHECK-NEXT: 1 2 1.00 uqincw x0, vl1
-# CHECK-NEXT: 1 2 0.50 uqincw z0.s
-# CHECK-NEXT: 1 2 0.50 uqincw z0.s, all, mul #16
-# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2
-# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2, mul #16
-# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, #0
-# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, #0
-# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, #0
-# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, #0
-# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 uqsub z31.b, z31.b, #255
-# CHECK-NEXT: 1 2 0.50 uqsub z31.d, z31.d, #65280
-# CHECK-NEXT: 1 2 0.50 uqsub z31.h, z31.h, #65280
-# CHECK-NEXT: 1 2 0.50 uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 2 2 0.50 uqincw z0.s
+# CHECK-NEXT: 2 2 0.50 uqincw z0.s, all, mul #16
+# CHECK-NEXT: 2 2 0.50 uqincw z0.s, pow2
+# CHECK-NEXT: 2 2 0.50 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 2 2 0.50 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: 2 2 0.50 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: 2 2 0.50 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: 2 2 0.50 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: 2 2 0.50 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 2 2 0.50 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: 2 2 0.50 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 2 2 0.50 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 2 2 0.50 uqsub z31.s, z31.s, #65280
# CHECK-NEXT: 1 3 0.50 usdot z0.s, z1.b, z31.b
# CHECK-NEXT: 1 3 0.50 usdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: 1 3 0.50 usmmla z0.s, z1.b, z2.b
-# CHECK-NEXT: 1 2 0.50 uunpkhi z31.d, z31.s
-# CHECK-NEXT: 1 2 0.50 uunpkhi z31.h, z31.b
-# CHECK-NEXT: 1 2 0.50 uunpkhi z31.s, z31.h
-# CHECK-NEXT: 1 2 0.50 uunpklo z31.d, z31.s
-# CHECK-NEXT: 1 2 0.50 uunpklo z31.h, z31.b
-# CHECK-NEXT: 1 2 0.50 uunpklo z31.s, z31.h
-# CHECK-NEXT: 1 2 1.00 uxtb z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 uxtb z0.h, p0/m, z0.h
-# CHECK-NEXT: 1 2 1.00 uxtb z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 1.00 uxtb z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 1.00 uxtb z31.h, p7/m, z31.h
-# CHECK-NEXT: 1 2 1.00 uxtb z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 uxth z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 uxth z0.s, p0/m, z0.s
-# CHECK-NEXT: 1 2 1.00 uxth z31.d, p7/m, z31.d
-# CHECK-NEXT: 1 2 1.00 uxth z31.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 uxtw z0.d, p0/m, z0.d
-# CHECK-NEXT: 1 2 1.00 uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 0.50 uunpkhi z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 uunpkhi z31.h, z31.b
+# CHECK-NEXT: 2 2 0.50 uunpkhi z31.s, z31.h
+# CHECK-NEXT: 2 2 0.50 uunpklo z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 uunpklo z31.h, z31.b
+# CHECK-NEXT: 2 2 0.50 uunpklo z31.s, z31.h
+# CHECK-NEXT: 2 2 1.00 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 2 1.00 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 1.00 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 1.00 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 2 1.00 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 1.00 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 2 2 1.00 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 2 2 1.00 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 2 1.00 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 2 2 1.00 uxtw z31.d, p7/m, z31.d
# CHECK-NEXT: 1 2 1.00 uzp1 p15.b, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 uzp1 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 uzp1 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 uzp1 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 uzp1 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 uzp1 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 uzp1 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 uzp1 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 uzp2 p15.b, p15.b, p15.b
# CHECK-NEXT: 1 2 1.00 uzp2 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 uzp2 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 uzp2 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 uzp2 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 uzp2 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 uzp2 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 uzp2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 2.00 whilele p0.b, w30, wzr
# CHECK-NEXT: 1 3 2.00 whilelo p15.d, xzr, x30
# CHECK-NEXT: 1 3 2.00 whilels p0.h, w30, wzr
@@ -4795,14 +4795,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 zip1 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 zip1 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 zip1 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 zip1 z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 zip1 z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 zip1 z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 zip1 z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 zip1 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 zip1 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 zip1 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 zip1 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 zip2 p0.b, p0.b, p0.b
# CHECK-NEXT: 1 2 1.00 zip2 p0.d, p0.d, p0.d
# CHECK-NEXT: 1 2 1.00 zip2 p0.h, p0.h, p0.h
@@ -4811,14 +4811,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 1.00 zip2 p15.d, p15.d, p15.d
# CHECK-NEXT: 1 2 1.00 zip2 p15.h, p15.h, p15.h
# CHECK-NEXT: 1 2 1.00 zip2 p15.s, p15.s, p15.s
-# CHECK-NEXT: 1 2 0.50 zip2 z0.b, z0.b, z0.b
-# CHECK-NEXT: 1 2 0.50 zip2 z0.d, z0.d, z0.d
-# CHECK-NEXT: 1 2 0.50 zip2 z0.h, z0.h, z0.h
-# CHECK-NEXT: 1 2 0.50 zip2 z0.s, z0.s, z0.s
-# CHECK-NEXT: 1 2 0.50 zip2 z31.b, z31.b, z31.b
-# CHECK-NEXT: 1 2 0.50 zip2 z31.d, z31.d, z31.d
-# CHECK-NEXT: 1 2 0.50 zip2 z31.h, z31.h, z31.h
-# CHECK-NEXT: 1 2 0.50 zip2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 2 0.50 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: 2 2 0.50 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: 2 2 0.50 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: 2 2 0.50 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: 2 2 0.50 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 2 2 0.50 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 2 2 0.50 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 2 2 0.50 zip2 z31.s, z31.s, z31.s
# CHECK: Resources:
# CHECK-NEXT: [0.0] - V1UnitB
@@ -4842,62 +4842,62 @@ zip2 z31.s, z31.s, z31.s
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11]
-# CHECK-NEXT: - - - - - - - 427.00 1140.50 1140.50 802.50 7.50 196.00 196.00 1778.25 1363.25 541.75 541.75
+# CHECK-NEXT: - - - - - - - 427.00 1140.50 1140.50 802.50 7.50 196.00 196.00 1683.25 1274.25 1681.75 1272.75
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7.0] [7.1] [8] [9] [10] [11] Instructions:
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.b, p0/m, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - abs z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z0.s, z1.s, z2.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.b, p5/m, z21.b, z10.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.b, z10.b, z21.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.d, p5/m, z21.d, z10.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.d, z10.d, z21.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.h, p5/m, z21.h, z10.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.h, z10.h, z21.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.s, p5/m, z21.s, z10.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z21.s, z10.s, z21.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.b, p3/m, z23.b, z13.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.b, z13.b, z8.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.d, p3/m, z23.d, z13.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.h, p3/m, z23.h, z13.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.h, z13.h, z8.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.s, p3/m, z23.s, z13.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z23.s, z13.s, z8.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.s, z31.s, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - add z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z0.s, z1.s, z2.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 add z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addpl sp, sp, #31
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addpl x0, x0, #-32
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addpl x21, x21, #0
@@ -4906,104 +4906,104 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addvl x0, x0, #-32
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addvl x21, x21, #0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - addvl x23, x8, #-1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, lsl #1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, lsl #2]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, lsl #3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, sxtw #1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, sxtw #2]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, sxtw #3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, sxtw]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, uxtw #1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, uxtw #2]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, uxtw #3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d, uxtw]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.d, [z0.d, z0.d]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.s, [z0.s, z0.s, lsl #1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.s, [z0.s, z0.s, lsl #2]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.s, [z0.s, z0.s, lsl #3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 adr z0.s, [z0.s, z0.s]
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - and p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z0.d, z0.d, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z0.s, z0.s, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z23.h, z23.h, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z23.h, z23.h, #0xfff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z5.b, z5.b, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - and z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 and z5.b, z5.b, #0xf9
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - ands p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - andv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - andv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - andv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - andv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.b, z0.b, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.b, z1.b, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.d, z0.d, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.h, z0.h, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.h, z1.h, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.s, z0.s, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z0.s, z1.s, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.b, z31.b, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.d, z31.d, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.h, z31.h, #16
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asr z31.s, z31.s, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrd z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - asrr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - bfcvt z0.h, p0/m, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - bfcvtnt z0.h, p0/m, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfdot z0.s, z1.h, z2.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfdot z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfdot z0.s, z1.h, z2.h[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalb z0.s, z1.h, z2.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalb z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalb z0.s, z1.h, z2.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalb z10.s, z21.h, z14.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalb z21.s, z14.h, z3.h[2]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalt z0.s, z1.h, z2.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalt z0.s, z1.h, z2.h[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalt z0.s, z1.h, z2.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalt z0.s, z1.h, z7.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmlalt z14.s, z10.h, z21.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 andv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 andv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 andv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 andv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - bic p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - bic p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 bic z31.s, p7/m, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - bics p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - bics p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - brka p0.b, p15/m, p15.b
@@ -5024,196 +5024,196 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - brkpb p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - brkpbs p0.b, p15/z, p1.b, p2.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - brkpbs p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta b0, p7, b0, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta d0, p7, d0, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta h0, p7, h0, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta s0, p7, s0, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clasta w0, p7, w0, z31.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clasta w0, p7, w0, z31.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clasta w0, p7, w0, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clasta x0, p7, x0, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta z0.b, p7, z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta z0.d, p7, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta z0.h, p7, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clasta z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb b0, p7, b0, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb d0, p7, d0, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb h0, p7, h0, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb s0, p7, s0, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clastb w0, p7, w0, z31.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clastb w0, p7, w0, z31.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clastb w0, p7, w0, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - clastb x0, p7, x0, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb z0.b, p7, z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb z0.d, p7, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb z0.h, p7, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - clastb z0.s, p7, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cls z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cls z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cls z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cls z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - clz z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - clz z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - clz z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - clz z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphi p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.b, p0/z, z1.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmphs p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmple p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.d, p0/z, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.d, p0/z, z0.d, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, #127
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, #-16
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, #15
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, z0.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnot z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnot z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnot z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnot z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnt z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnt z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnt z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clasta w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clasta w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clasta w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clasta x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clastb w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clastb w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clastb w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 clastb x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 cnt z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntb x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntb x0, #28
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntb x0, all, mul #16
@@ -5234,12 +5234,12 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntw x0, #28
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - cntw x0, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - compact z31.d, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - compact z31.s, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.b, p7/m, w0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.d, p7/m, sp
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.h, p7/m, w0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.s, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 compact z31.d, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 compact z31.s, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.b, p7/m, w0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.d, p7/m, sp
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.h, p7/m, w0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.s, p7/m, wsp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - ctermeq w30, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - ctermeq wzr, w30
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - ctermeq x30, xzr
@@ -5271,356 +5271,356 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decp xzr, p15.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.d, p15.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.h, p15.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - decp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 decp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 decp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 decp z31.s, p15.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - decw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, #256
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.h, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.s, #512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, #256
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.s, #512
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.b, w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.d, x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.h, wsp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.s, wsp
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - dupm z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - dupm z0.s, #0xfffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - dupm z23.h, #0xfff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - dupm z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 dupm z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 dupm z5.b, #0xf9
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - eor p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z0.d, z0.d, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z0.s, z0.s, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z23.h, z23.h, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z23.h, z23.h, #0xfff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z5.b, z5.b, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 eor z5.b, z5.b, #0xf9
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - eors p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - eorv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - eorv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - eorv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - eorv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ext z31.b, z31.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ext z31.b, z31.b, z0.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabd z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabd z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabd z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabs z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabs z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fabs z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - facgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fadd z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 - - fadda d0, p7, d0, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 18.00 - - - fadda h0, p7, h0, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - - - fadda s0, p7, s0, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - faddv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - faddv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - faddv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.d, p0/m, z0.d, z0.d, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.h, p0/m, z0.h, z0.h, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z0.s, p0/m, z0.s, z0.s, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z31.d, p7/m, z31.d, z31.d, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z31.h, p7/m, z31.h, z31.h, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcadd z31.s, p7/m, z31.s, z31.s, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z1.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z1.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z1.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.d, p0/m, z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.d, p0/m, z1.d, z2.d, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.h, p0/m, z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.h, p0/m, z1.h, z2.h, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.h, z0.h, z0.h[0], #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.s, p0/m, z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z0.s, p0/m, z1.s, z2.s, #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z21.s, z10.s, z5.s[1], #90
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z23.s, z13.s, z8.s[0], #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z29.d, p7/m, z30.d, z31.d, #180
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z29.h, p7/m, z30.h, z31.h, #180
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z29.s, p7/m, z30.s, z31.s, #180
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z31.d, p7/m, z31.d, z31.d, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z31.h, p7/m, z31.h, z31.h, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z31.h, z31.h, z7.h[3], #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fcmla z31.s, p7/m, z31.s, z31.s, #270
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmle p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmle p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmle p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.d, p0/z, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.h, p0/z, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.s, p0/z, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmne p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.d, p0/z, z0.d, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.h, p0/z, z0.h, z1.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.s, p0/z, z0.s, z1.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvt z0.d, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvt z0.d, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvt z0.h, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvt z0.h, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvt z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvt z0.s, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.d, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzs z0.d, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzs z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzs z0.s, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzs z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.d, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzu z0.d, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - fcvtzu z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - fcvtzu z0.s, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - fcvtzu z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fdiv z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fdivr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fexpa z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmad z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmad z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmad z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmax z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmaxnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fmaxnmv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fmaxnmv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fmaxnmv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fmaxv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fmaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fmaxv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmin z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.d, p0/m, z0.d, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.h, p0/m, z0.h, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.s, p0/m, z0.s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fminnm z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fminnmv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fminnmv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fminnmv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - fminv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 - - fminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 - - fminv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.d, z1.d, z7.d[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.h, z1.h, z7.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmla z0.s, z1.s, z7.s[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.d, z1.d, z7.d[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.h, z1.h, z7.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmls z0.s, z1.s, z7.s[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.d, #-10.00000000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.d, #0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.d, p0/m, #-10.00000000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.d, p0/m, #0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.h, #-0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.h, p0/m, #-0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.s, #-0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmov z0.s, p0/m, #-0.12500000
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmsb z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmsb z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmsb z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.d, z0.d, z0.d[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.h, z0.h, z0.h[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.s, z0.s, z0.s[0]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.d, p7/m, z31.d, #2.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.d, z31.d, z15.d[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.h, p7/m, z31.h, #2.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.h, z31.h, z7.h[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.s, p7/m, z31.s, #2.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmul z31.s, z31.s, z7.s[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmulx z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmulx z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fmulx z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fneg z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fneg z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fneg z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmad z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmad z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmad z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmla z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmla z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmla z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmls z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmls z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmls z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmsb z0.d, p7/m, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmsb z0.h, p7/m, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fnmsb z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpe z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frecps z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpx z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpx z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frecpx z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinta z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinta z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinta z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinti z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinti z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frinti z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintm z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintm z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintm z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintn z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintn z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintn z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintp z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintp z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintp z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintx z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintx z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintx z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintz z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintz z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frintz z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - frsqrte z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - frsqrts z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fscale z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsub z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.d, p0/m, z0.d, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.h, p0/m, z0.h, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.s, p0/m, z0.s, #0.5
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z31.d, p7/m, z31.d, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z31.h, p7/m, z31.h, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - fsubr z31.s, p7/m, z31.s, #1.0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftmad z0.d, z0.d, z31.d, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftmad z0.h, z0.h, z31.h, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftmad z0.s, z0.s, z31.s, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftsmul z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftsmul z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftsmul z0.s, z1.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftssel z0.d, z1.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftssel z0.h, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 eorv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 eorv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 eorv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 eorv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.50 1.50 1.50 1.50 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 18.00 - 18.00 - fadda h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 10.00 - 10.00 - fadda s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 faddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 3.00 3.00 faddv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 2.50 2.50 faddv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - 14.00 - fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - 12.00 - fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - 9.00 - fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - 14.00 - fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - 12.00 - fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - 9.00 - fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fexpa z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fexpa z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fexpa z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 3.00 3.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 2.50 2.50 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 fmaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 3.00 3.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 2.50 2.50 fmaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 fminnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 3.00 3.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 2.50 2.50 fminnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 fminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 3.00 3.00 3.00 3.00 fminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.50 2.50 2.50 2.50 fminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.d, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.d, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.h, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.s, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 14.00 - 14.00 - fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 12.00 - 12.00 - fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 9.00 - 9.00 - fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 ftssel z0.s, z1.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incb x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incb x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incb x0, all, mul #16
@@ -5631,15 +5631,15 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 incd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 incd z0.d, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - inch x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - inch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - inch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 inch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 inch z0.h, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp x0, p0.h
@@ -5648,76 +5648,76 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incp xzr, p15.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.d, p15.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.h, p15.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - incp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 incp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 incp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 incp z31.s, p15.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - incw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - incw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.b, #0, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - index z0.d, #0, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.h, #0, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z0.h, w0, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z0.s, #0, #0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z21.b, w10, w21
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z21.d, x10, x21
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z21.s, w10, w21
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.b, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.b, w13, #8
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z23.d, #13, x8
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z23.d, x13, #8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.h, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.h, w13, #8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.s, #13, w8
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z23.s, w13, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.b, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.b, wzr, wzr
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - index z31.d, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, #-1, xzr
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, xzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 2.00 - - - index z31.d, xzr, xzr
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.h, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.h, wzr, wzr
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - index z31.s, #-1, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, #-1, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, wzr, #-1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - index z31.s, wzr, wzr
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.b, w0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.d, x0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.h, w0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z0.s, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - insr z31.b, b31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z31.b, wzr
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - insr z31.d, d31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z31.d, xzr
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - insr z31.h, h31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z31.h, wzr
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - insr z31.s, s31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - insr z31.s, wzr
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lasta b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lasta d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lasta h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lasta s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lasta w0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lasta w0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lasta w0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lasta x0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lastb b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lastb d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lastb h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lastb s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lastb w0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lastb w0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lastb w0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - lastb x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 incw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 incw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z0.b, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - index z0.d, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z0.h, #0, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z0.h, w0, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z0.s, #0, #0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z21.b, w10, w21
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z21.d, x10, x21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z21.s, w10, w21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.b, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.b, w13, #8
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z23.d, #13, x8
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z23.d, x13, #8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.h, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.h, w13, #8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.s, #13, w8
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z23.s, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z31.b, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.b, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.b, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.b, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - index z31.d, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z31.d, #-1, xzr
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z31.d, xzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - 1.00 - index z31.d, xzr, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z31.h, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.h, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.h, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.h, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - index z31.s, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.s, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.s, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - 1.00 - index z31.s, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z0.b, w0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z0.d, x0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z0.h, w0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 insr z31.b, b31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z31.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 insr z31.d, d31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z31.d, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 insr z31.h, h31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z31.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 insr z31.s, s31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 insr z31.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lasta b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lasta d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lasta h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lasta s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lasta w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lasta w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lasta w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lasta x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lastb b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lastb d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lastb h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lastb s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lastb w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lastb w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lastb w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - 1.00 lastb x0, p7, z31.d
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.b }, p0/z, [sp, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.b }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ld1b { z0.b }, p0/z, [x0]
@@ -5916,66 +5916,66 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
# CHECK-NEXT: - - - - - - - 4.00 4.00 4.00 - - - - 3.00 3.00 3.00 3.00 ld1w { z31.s }, p7/z, [z31.s, #124]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z0.b, z1.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z0.b, z1.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2b { z5.b, z6.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z0.d, z1.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 1.00 1.00 - - ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z0.h, z1.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 1.00 1.00 - - ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z0.s, z1.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3b { z0.b - z2.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z0.b - z2.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3b { z5.b - z7.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z0.d - z2.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z0.h - z2.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z0.s - z2.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 3.00 3.00 - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
-# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 3.00 3.00 - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4b { z0.b - z3.b }, p0/z, [x0, x0]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z0.b - z3.b }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4b { z5.b - z8.b }, p3/z, [x17, x16]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z0.d - z3.d }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z0.h - z3.h }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z0.s - z3.s }, p0/z, [x0]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 4.00 4.00 - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 4.00 4.00 - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 0.50 0.50 0.50 0.50 ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 1.00 1.00 0.50 0.50 0.50 0.50 ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 0.50 0.50 ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - 1.50 1.50 1.50 1.50 ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 3.00 3.00 1.50 1.50 1.50 1.50 ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - 2.00 2.00 2.00 2.00 ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 4.00 4.00 2.00 2.00 2.00 2.00 ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z0.d }, p0/z, [x0, x0]
# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 - - - - 1.50 1.50 1.50 1.50 ldff1b { z0.d }, p0/z, [z0.d]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - ldff1b { z0.h }, p0/z, [x0, x0]
@@ -6153,172 +6153,172 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldr z0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldr z23, [x13, #255, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - ldr z31, [sp, #-256, mul vl]
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.b, z1.b, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.d, p0/m, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.h, z1.h, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z0.s, z1.s, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.b, p0/m, z31.b, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.b, z31.b, #7
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.d, p0/m, z31.d, #63
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.d, z31.d, #63
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.h, p0/m, z31.h, #15
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.h, z31.h, #15
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.s, p0/m, z31.s, #31
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsl z31.s, z31.s, #31
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lslr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lslr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lslr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lslr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.b, z0.b, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.b, z1.b, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.d, p0/m, z0.d, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.d, z0.d, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.h, z0.h, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.h, z1.h, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, z1.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.s, z0.s, #1
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z0.s, z1.s, z2.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.b, p0/m, z31.b, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.b, z31.b, #8
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.d, p0/m, z31.d, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.d, z31.d, #64
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.h, p0/m, z31.h, #16
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.h, z31.h, #16
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.s, p0/m, z31.s, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsr z31.s, z31.s, #32
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsrr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsrr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsrr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - lsrr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mad z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mla z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mls z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.b, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.d, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.h, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsl z31.s, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mad z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mla z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mls z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p0.b, p0/m, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p0.b, p0/z, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p15.b, p15/m, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov p15.b, p15/z, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.b, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.b, b0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.b, p0/m, b0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z0.b, p0/m, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.b, b0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.b, p0/m, b0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z0.b, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.b, p0/z, #127
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.b, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, #0xe0000000000003ff
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, #0xffffffffffff7fff
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, #32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, d0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, p0/m, d0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z0.d, p0/m, x0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, p0/m, d0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z0.d, p0/m, x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.d, x0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, #-256
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, #32767
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, h0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, p0/m, h0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z0.h, p0/m, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, #-256
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, #32767
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, p0/m, h0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z0.h, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.h, p0/z, #32512
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.h, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.q, q0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.s, #0xffff7fff
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.s, #32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.s, p0/m, s0
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z0.s, p0/m, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z0.s, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.q, q0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.s, #0xffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.s, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.s, p0/m, s0
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z0.s, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z0.s, s0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z0.s, w0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p0/z, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p0/z, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p0/z, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p0/z, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p15/m, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.d, p15/m, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p0/z, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p0/z, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p0/z, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p0/z, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p15/m, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.h, p15/m, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p0/z, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p0/z, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p0/z, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p0/z, #32512
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p15/m, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z21.s, p15/m, #-32768
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.b, p15/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.b, p7/m, b31
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - movprfx z31, z6
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.b, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.b, p7/m, b31
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 movprfx z31, z6
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.b, p7/m, wsp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.b, wsp
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.b, z31.b[63]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.d, p15/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.d, p7/m, d31
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - movprfx z31.d, p7/z, z6.d
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.d, p7/m, sp
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.b, z31.b[63]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.d, p7/m, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.d, p7/m, sp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.d, sp
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.d, z31.d[7]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.h, p15/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.h, p7/m, h31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.h, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.d, z31.d[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.h, p7/m, h31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.h, p7/m, wsp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.h, wsp
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.h, z31.h[31]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.s, p15/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.s, p7/m, s31
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 - - mov z31.s, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.h, z31.h[31]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.s, p7/m, s31
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.50 0.50 0.50 0.50 mov z31.s, p7/m, wsp
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - mov z31.s, wsp
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z31.s, z31.s[15]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, #-1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, p0/z, #-1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, p0/z, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, p0/z, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.b, p15/m, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.d, #-6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.h, #-6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.q, z17.q[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - mov z5.s, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z31.s, z31.s[15]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, p0/z, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.b, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.d, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.h, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.q, z17.q[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 mov z5.s, #-6
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - movs p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - movs p0.b, p0/z, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - movs p15.b, p15.b
@@ -6329,44 +6329,44 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL2
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - msr ZCR_EL1, x3
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - msb z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - msb z0.d, p0/m, z0.d, z0.d
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - msr ZCR_EL12, x3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - msr ZCR_EL2, x3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - msr ZCR_EL3, x3
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mul z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.b, z31.b, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.b, z31.b, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mul z31.d, z31.d, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - mul z31.d, z31.d, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.h, z31.h, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.h, z31.h, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.s, z31.s, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - mul z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.b, z31.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mul z31.d, z31.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - mul z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.h, z31.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.s, z31.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.s, z31.s, #127
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - nand p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - nand p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nands p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nands p15.b, p15/z, p15.b, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z0.b, p0/m, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - neg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 neg z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - nor p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - nor p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nors p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nors p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - not p0.b, p0/z, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - not p15.b, p15/z, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - not z31.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - not z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - not z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - not z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 not z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 not z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 not z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 not z31.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nots p0.b, p0/z, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - nots p15.b, p15/z, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - orn p0.b, p0/z, p0.b, p0.b
@@ -6374,24 +6374,24 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - orns p0.b, p0/z, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - orns p15.b, p15/z, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - orr p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z0.d, z0.d, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z0.d, z0.d, #0xfffffffffffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z0.s, z0.s, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z0.s, z0.s, #0xfffffff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z23.h, z23.h, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z23.h, z23.h, #0xfff9
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z5.b, z5.b, #0x6
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 orr z5.b, z5.b, #0xf9
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - orrs p0.b, p0/z, p0.b, p1.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - orv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - orv d0, p7, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - orv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 - - orv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 orv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 orv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 orv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 2.00 2.00 2.00 orv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - pfalse p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - pfirst p0.b, p15, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - pfirst p15.b, p15, p15.b
@@ -6484,10 +6484,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - punpkhi p15.h, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - punpklo p0.h, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - punpklo p15.h, p15.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rbit z0.b, p7/m, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rbit z0.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rbit z0.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rbit z0.s, p7/m, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdffr p0.b
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - rdffr p0.b, p0/z
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdffr p15.b
@@ -6498,95 +6498,95 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl x21, #-32
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl x23, #31
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - rdvl xzr, #-1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rev z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rev z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rev z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - rev z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revb z0.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revb z0.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revb z0.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revh z0.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revh z0.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - revw z0.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - saddv d0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - scvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - scvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - scvtf z0.h, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - scvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - scvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - sdiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - sdiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - sdivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - sdivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - sdot z0.d, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sdot z0.s, z1.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sdot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sel z23.b, p11, z13.b, z8.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sel z23.d, p11, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sel z23.h, p11, z13.h, z8.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rev z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rev z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rev z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 rev z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 saddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 saddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 saddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - 20.00 - sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - 11.00 - sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - 20.00 - sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - 11.00 - sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sel z23.s, p11, z13.s, z8.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - setffr
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z0.b, z0.b, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z0.d, z0.d, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z0.h, z0.h, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z0.s, z0.s, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.b, z31.b, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.d, z31.d, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.h, z31.h, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smax z31.s, z31.s, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - smaxv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.b, z0.b, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.d, z0.d, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.h, z0.h, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z0.s, z0.s, #-128
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.b, z31.b, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.d, z31.d, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.h, z31.h, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smin z31.s, z31.s, #127
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - sminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smax z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smin z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 sminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 sminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 sminv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - smmla z0.s, z1.b, z2.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - smulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - smulh z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - smulh z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - smulh z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - splice z31.b, p7, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - splice z31.d, p7, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - splice z31.h, p7, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - splice z31.s, p7, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqadd z31.s, z31.s, #65280
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecb x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecb x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecb x0, all, mul #16
@@ -6605,10 +6605,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecd x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, all, mul #16
@@ -6618,10 +6618,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdech x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdech z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp x0, p0.h
@@ -6630,9 +6630,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.d, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.h, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecp xzr, p15.s, wzr
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqdecp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, all, mul #16
@@ -6642,10 +6642,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqdecw x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqdecw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincb x0, all, mul #16
@@ -6664,10 +6664,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincd x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, all, mul #16
@@ -6677,10 +6677,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqinch x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqinch z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp x0, p0.h
@@ -6689,9 +6689,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.d, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.h, wzr
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincp xzr, p15.s, wzr
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - sqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 sqincp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, #14
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, all, mul #16
@@ -6701,25 +6701,25 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - sqincw x0, w0, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqincw z0.s, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sqsub z31.s, z31.s, #65280
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.b }, p0, [x0, x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.b }, p0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 st1b { z0.d }, p0, [x0, x0]
@@ -6887,161 +6887,161 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 str z0, [x0]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 str z21, [x10, #-256, mul vl]
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 0.25 0.25 0.25 0.25 str z31, [sp, #255, mul vl]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.b, p5/m, z21.b, z10.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.b, z10.b, z21.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.d, p5/m, z21.d, z10.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.d, z10.d, z21.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.h, p5/m, z21.h, z10.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.h, z10.h, z21.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.s, p5/m, z21.s, z10.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z21.s, z10.s, z21.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.b, p3/m, z23.b, z13.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.b, z13.b, z8.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.d, p3/m, z23.d, z13.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.d, z13.d, z8.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.h, p3/m, z23.h, z13.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.h, z13.h, z8.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.s, p3/m, z23.s, z13.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z23.s, z13.s, z8.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.s, z31.s, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sub z31.s, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.b, p0/m, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.d, p0/m, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.h, p0/m, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.s, p0/m, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - subr z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 subr z31.s, z31.s, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sudot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.d, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.h, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpkhi z31.s, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpklo z31.d, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpklo z31.h, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - sunpklo z31.s, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtb z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxth z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxth z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxth z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxth z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtw z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - sxtw z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl z31.b, { z31.b }, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl z31.d, { z31.d }, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl z31.h, { z31.h }, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl z31.s, { z31.s }, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn1 p15.b, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn1 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn1 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn1 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn1 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn1 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn1 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn1 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn2 p15.b, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn2 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn2 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - trn2 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn2 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn2 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn2 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - trn2 z31.s, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uabd z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uaddv d0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - ucvtf z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - - - ucvtf z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - ucvtf z0.h, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - ucvtf z0.s, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - ucvtf z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - udiv z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - udiv z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - - - udivr z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - - - udivr z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z15.h[1]
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - udot z0.d, z1.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - udot z0.s, z1.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - udot z0.s, z1.b, z7.b[3]
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umax z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - umaxv s0, p7, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.b, p7/m, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.d, p7/m, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.h, p7/m, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - umin z31.s, p7/m, z31.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv b0, p7, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv h0, p7, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - uminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 4.00 - 4.00 - ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - 20.00 - udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - 11.00 - udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 20.00 - 20.00 - udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 11.00 - 11.00 - udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - udot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - 2.00 uminv s0, p7, z31.s
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - ummla z0.s, z1.b, z2.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - umulh z0.b, p7/m, z0.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - - - umulh z0.d, p7/m, z0.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - umulh z0.h, p7/m, z0.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - - - umulh z0.s, p7/m, z0.s, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 2.00 - 2.00 - umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 1.00 - 1.00 - umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqadd z31.s, z31.s, #65280
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecb w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecb w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecb w0, pow2
@@ -7060,10 +7060,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech w0, pow2
@@ -7073,10 +7073,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdech x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdech z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp wzr, p15.h
@@ -7085,9 +7085,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecp x0, p0.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqdecp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw w0, pow2
@@ -7097,10 +7097,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqdecw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqdecw z0.s, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincb w0, pow2
@@ -7119,10 +7119,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincd x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincd z0.d, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch w0, pow2
@@ -7132,10 +7132,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqinch x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqinch z0.h, pow2, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp wzr, p15.h
@@ -7144,9 +7144,9 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincp x0, p0.s
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.d, p0.d
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.h, p0.h
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 1.00 - - uqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.50 0.50 0.50 0.50 uqincp z0.s, p0.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw w0, pow2
@@ -7156,62 +7156,62 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, all, mul #16
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, pow2
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uqincw x0, vl1
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, all, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, pow2
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqincw z0.s, pow2, mul #16
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.b, z0.b, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.d, z0.d, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.d, z0.d, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.h, z0.h, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.h, z0.h, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.s, z0.s, #0
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.s, z0.s, #0, lsl #8
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.b, z31.b, #255
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.d, z31.d, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.h, z31.h, #65280
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uqsub z31.s, z31.s, #65280
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 usdot z0.s, z1.b, z31.b
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 usdot z0.s, z1.b, z7.b[3]
# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - usmmla z0.s, z1.b, z2.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpkhi z31.d, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpkhi z31.h, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpkhi z31.s, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpklo z31.d, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpklo z31.h, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uunpklo z31.s, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z0.h, p0/m, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z31.h, p7/m, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtb z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxth z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxth z0.s, p0/m, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxth z31.d, p7/m, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxth z31.s, p7/m, z31.s
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtw z0.d, p0/m, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - 1.00 uxtw z31.d, p7/m, z31.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp1 p15.b, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp1 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp1 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp1 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp1 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp1 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp1 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp1 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp2 p15.b, p15.b, p15.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp2 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp2 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - uzp2 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp2 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp2 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp2 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uzp2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - whilele p0.b, w30, wzr
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - whilelo p15.d, xzr, x30
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - whilels p0.h, w30, wzr
@@ -7226,14 +7226,14 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip1 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip1 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip1 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip1 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p0.b, p0.b, p0.b
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p0.d, p0.d, p0.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p0.h, p0.h, p0.h
@@ -7242,11 +7242,11 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p15.d, p15.d, p15.d
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p15.h, p15.h, p15.h
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - zip2 p15.s, p15.s, p15.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z0.b, z0.b, z0.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z0.d, z0.d, z0.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z0.h, z0.h, z0.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z0.s, z0.s, z0.s
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z31.b, z31.b, z31.b
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z31.d, z31.d, z31.d
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z31.h, z31.h, z31.h
-# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 - - zip2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 zip2 z31.s, z31.s, z31.s
More information about the llvm-commits
mailing list